From 6dbccee90b6f53d593337c83ee98262cdb701317 Mon Sep 17 00:00:00 2001 From: Charlie Le <3375195+CharlieTLe@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:48:46 -0700 Subject: [PATCH 1/6] Add logic for handling default filesystem root dir We started using a BucketClient for the filesystem where the default directory is an empty string: https://github.com/cortexproject/cortex/blob/af9e20c54ee97f409008f3e86541c5dfa5038e22/pkg/storage/bucket/filesystem/config.go#L17 When we create the new bucket, https://github.com/cortexproject/cortex/blob/526a6d935a948119cf74033a9f79391786022222/vendor/github.com/thanos-io/objstore/providers/filesystem/filesystem.go#L46 uses `filepath.Abs("")` which will use the current working directory as the root directory. Fixes: https://github.com/cortexproject/cortex/issues/6219 Signed-off-by: Charlie Le --- pkg/cortex/modules.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index 9e72894f9e0..9489fab2ec7 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -160,6 +160,20 @@ func (t *Cortex) initRuntimeConfig() (services.Service, error) { registerer := prometheus.WrapRegistererWithPrefix("cortex_", prometheus.DefaultRegisterer) logger := util_log.Logger bucketClientFactory := func(ctx context.Context) (objstore.Bucket, error) { + // When directory is an empty string but the runtime-config.file is an absolute path, + // the filesystem.NewBucketClient will treat it as a relative path based on the current working directory + // that the process is running in. + if t.Cfg.RuntimeConfig.StorageConfig.Backend == bucket.Filesystem { + if t.Cfg.RuntimeConfig.StorageConfig.Filesystem.Directory == "" { + // Check if runtime-config.file is an absolute path + if t.Cfg.RuntimeConfig.LoadPath[0] == '/' { + // If it is, set the directory to the root directory so that the filesystem bucket + // will treat it as an absolute path. This is to maintain backwards compatibility + // with the previous behavior of the runtime-config.file of allowing relative and absolute paths. + t.Cfg.RuntimeConfig.StorageConfig.Filesystem.Directory = "/" + } + } + } return bucket.NewClient(ctx, t.Cfg.RuntimeConfig.StorageConfig, "runtime-config", logger, registerer) } serv, err := runtimeconfig.New(t.Cfg.RuntimeConfig, registerer, logger, bucketClientFactory) From c2c6b53614b9ab0235ba726dab0cc26807fc20b9 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Sat, 21 Sep 2024 16:48:10 -0700 Subject: [PATCH 2/6] Update tests to set workDir Allows for testing what happens when cortex is run from a directory that is not the root directory. Signed-off-by: Charlie Le --- integration/e2e/service.go | 11 +++++++++++ integration/runtime_config_test.go | 21 +++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/integration/e2e/service.go b/integration/e2e/service.go index 50fc0b83011..33daf3afdf7 100644 --- a/integration/e2e/service.go +++ b/integration/e2e/service.go @@ -48,6 +48,9 @@ type ConcreteService struct { // docker NetworkName used to start this container. // If empty it means service is stopped. usedNetworkName string + + // workDir is the working directory inside the container + workDir string } func NewConcreteService( @@ -92,6 +95,10 @@ func (s *ConcreteService) SetUser(user string) { s.user = user } +func (s *ConcreteService) SetWorkDir(workDir string) { + s.workDir = workDir +} + func (s *ConcreteService) Start(networkName, sharedDir string) (err error) { // In case of any error, if the container was already created, we // have to cleanup removing it. We ignore the error of the "docker rm" @@ -309,6 +316,10 @@ func (s *ConcreteService) buildDockerRunArgs(networkName, sharedDir string) []st args = append(args, "--user", s.user) } + if s.workDir != "" { + args = append(args, "--workdir", s.workDir) + } + // Published ports for _, port := range s.networkPorts { args = append(args, "-p", strconv.Itoa(port)) diff --git a/integration/runtime_config_test.go b/integration/runtime_config_test.go index 7f9f0ceb302..bdc0819e388 100644 --- a/integration/runtime_config_test.go +++ b/integration/runtime_config_test.go @@ -33,8 +33,9 @@ func TestLoadRuntimeConfigFromStorageBackend(t *testing.T) { filePath := filepath.Join(e2e.ContainerSharedDir, runtimeConfigFile) tests := []struct { - name string - flags map[string]string + name string + flags map[string]string + workDir string }{ { name: "no storage backend provided", @@ -49,11 +50,27 @@ func TestLoadRuntimeConfigFromStorageBackend(t *testing.T) { "-runtime-config.backend": "filesystem", }, }, + { + name: "runtime-config.file is a relative path", + flags: map[string]string{ + "-runtime-config.file": runtimeConfigFile, + }, + workDir: e2e.ContainerSharedDir, + }, + { + name: "runtime-config.file is an absolute path but working directory is not /", + flags: map[string]string{ + "-runtime-config.file": filePath, + }, + workDir: "/var/lib/cortex", + }, } for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { cortexSvc := e2ecortex.NewSingleBinaryWithConfigFile(fmt.Sprintf("cortex-%d", i), cortexConfigFile, tt.flags, "", 9009, 9095) + cortexSvc.SetWorkDir(tt.workDir) + require.NoError(t, s.StartAndWaitReady(cortexSvc)) assertRuntimeConfigLoadedCorrectly(t, cortexSvc) From ebd158fbeb0d4719a80e9e03f0fd553023fd6dd2 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Sat, 21 Sep 2024 16:54:30 -0700 Subject: [PATCH 3/6] Update changelog Signed-off-by: Charlie Le --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61f8f3703f2..c65f8e14ce1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * [ENHANCEMENT] Ingester: Add new API `/ingester/all_user_stats` which shows loaded blocks, active timeseries and ingestion rate for a specific ingester. #6178 * [ENHANCEMENT] Distributor: Add new `cortex_reduced_resolution_histogram_samples_total` metric to track the number of histogram samples which resolution was reduced. #6182 * [ENHANCEMENT] StoreGateway: Implement metadata API limit in queryable. #6195 +* [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224 ## 1.18.0 2024-09-03 From eec447185163aa91e13268cbe5f5084aa1275686 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Sun, 22 Sep 2024 12:51:57 -0700 Subject: [PATCH 4/6] Wait until container is removed after stopping The E2E tests use `docker stop` to stop the container, but the container can still be in the process of being removed after the stop command is executed. This can cause tests that stop and start a container in rapid succession with the same container name to fail with conflicting container names. Example error message in an integration test: ``` 00:08:14 Stopping query-frontend 00:08:14 Stopping store-gateway === RUN TestNewDistributorsCanPushToOldIngestersWithReplication/Backward_compatibility_upgradir 00:08:14 Starting store-gateway 00:08:14 store-gateway: docker: Error response from daemon: Conflict. The container name "/e2e-cortex-test-store-gateway" is already in use by container "b750a7b7e2e3eebc617ac832de2c8a643f884bbc79b0c8961ceebace8cad5c85". You have to remove (or rename) that container to be able to reuse that name. ``` Signed-off-by: Charlie Le --- integration/e2e/scenario_test.go | 14 ++++++++++++++ integration/e2e/service.go | 16 +++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/integration/e2e/scenario_test.go b/integration/e2e/scenario_test.go index 50b61dbc07f..12249f3900d 100644 --- a/integration/e2e/scenario_test.go +++ b/integration/e2e/scenario_test.go @@ -146,3 +146,17 @@ func TestScenario(t *testing.T) { _, err = bkt.Get(context.Background(), "recipe") require.Error(t, err) } + +// TestStartStop tests for ensuring that when the container is stopped, it can be started again. +// This is to test that the stop waits for the container to be stopped and cleaned up before returning. +func TestStartStop(t *testing.T) { + s, err := e2e.NewScenario("e2e-scenario-test") + require.NoError(t, err) + + m1 := e2edb.NewMinio(9000, bktName) + + for i := 0; i < 10; i++ { + require.NoError(t, s.Start(m1)) + require.NoError(t, s.Stop(m1)) + } +} diff --git a/integration/e2e/service.go b/integration/e2e/service.go index 50fc0b83011..a9d38fee0b7 100644 --- a/integration/e2e/service.go +++ b/integration/e2e/service.go @@ -151,6 +151,9 @@ func (s *ConcreteService) Stop() error { logger.Log(string(out)) return err } + + s.Wait() + s.usedNetworkName = "" return nil @@ -168,15 +171,22 @@ func (s *ConcreteService) Kill() error { return err } - // Wait until the container actually stopped. However, this could fail if - // the container already exited, so we just ignore the error. - _, _ = RunCommandAndGetOutput("docker", "wait", s.containerName()) + s.Wait() s.usedNetworkName = "" return nil } +// Wait waits until the service is stopped. +func (s *ConcreteService) Wait() { + // Wait until the container actually stopped. However, this could fail if + // the container already exited, so we just ignore the error. + if out, err := RunCommandAndGetOutput("docker", "wait", s.containerName()); err != nil { + logger.Log(string(out)) + } +} + // Endpoint returns external (from host perspective) service endpoint (host:port) for given internal port. // External means that it will be accessible only from host, but not from docker containers. // From 44853ee61aea9792810c785a64ba3f141e0ea9c5 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Wed, 25 Sep 2024 16:00:28 -0700 Subject: [PATCH 5/6] Add alertmanager config flags Signed-off-by: Charlie Le --- integration/runtime_config_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/integration/runtime_config_test.go b/integration/runtime_config_test.go index 6ad8041a593..8952b59f7c6 100644 --- a/integration/runtime_config_test.go +++ b/integration/runtime_config_test.go @@ -62,6 +62,10 @@ func TestLoadRuntimeConfigFromStorageBackend(t *testing.T) { name: "runtime-config.file is a relative path", flags: map[string]string{ "-runtime-config.file": runtimeConfigFile, + // alert manager + "-alertmanager.web.external-url": "http://localhost/alertmanager", + "-alertmanager-storage.backend": "local", + "-alertmanager-storage.local.path": filepath.Join(e2e.ContainerSharedDir, "alertmanager_configs"), }, workDir: e2e.ContainerSharedDir, }, @@ -69,6 +73,10 @@ func TestLoadRuntimeConfigFromStorageBackend(t *testing.T) { name: "runtime-config.file is an absolute path but working directory is not /", flags: map[string]string{ "-runtime-config.file": filePath, + // alert manager + "-alertmanager.web.external-url": "http://localhost/alertmanager", + "-alertmanager-storage.backend": "local", + "-alertmanager-storage.local.path": filepath.Join(e2e.ContainerSharedDir, "alertmanager_configs"), }, workDir: "/var/lib/cortex", }, From fbe118b889266e963d194dec8b7c6b5010ec5530 Mon Sep 17 00:00:00 2001 From: Ahmed Hassan <57634502+afhassan@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:42:04 -0700 Subject: [PATCH 6/6] [Querier] Fix Flaky Response Compression Test (#6243) * fix flaky response compression test Signed-off-by: Ahmed Hassan * refactor test helper function Signed-off-by: Ahmed Hassan --------- Signed-off-by: Ahmed Hassan --- .../instantquery/instant_query_test.go | 16 ++++++++++++---- .../tripperware/queryrange/query_range_test.go | 16 +++++++++++++--- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/pkg/querier/tripperware/instantquery/instant_query_test.go b/pkg/querier/tripperware/instantquery/instant_query_test.go index 0f562909bd6..c9cb6d44af5 100644 --- a/pkg/querier/tripperware/instantquery/instant_query_test.go +++ b/pkg/querier/tripperware/instantquery/instant_query_test.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "net/http" + "sort" "strconv" "testing" "time" @@ -26,6 +27,12 @@ import ( const testHistogramResponse = `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"prometheus_http_request_duration_seconds","handler":"/metrics","instance":"localhost:9090","job":"prometheus"},"histogram":[1719528871.898,{"count":"6342","sum":"43.31319875499995","buckets":[[0,"0.0013810679320049755","0.0015060652591874421","1"],[0,"0.0015060652591874421","0.001642375811042411","7"],[0,"0.001642375811042411","0.0017910235218841233","5"],[0,"0.0017910235218841233","0.001953125","13"],[0,"0.001953125","0.0021298979153618314","19"],[0,"0.0021298979153618314","0.0023226701464896895","13"],[0,"0.0023226701464896895","0.002532889755177753","13"],[0,"0.002532889755177753","0.002762135864009951","15"],[0,"0.002762135864009951","0.0030121305183748843","12"],[0,"0.0030121305183748843","0.003284751622084822","34"],[0,"0.003284751622084822","0.0035820470437682465","188"],[0,"0.0035820470437682465","0.00390625","372"],[0,"0.00390625","0.004259795830723663","400"],[0,"0.004259795830723663","0.004645340292979379","411"],[0,"0.004645340292979379","0.005065779510355506","425"],[0,"0.005065779510355506","0.005524271728019902","425"],[0,"0.005524271728019902","0.0060242610367497685","521"],[0,"0.0060242610367497685","0.006569503244169644","621"],[0,"0.006569503244169644","0.007164094087536493","593"],[0,"0.007164094087536493","0.0078125","506"],[0,"0.0078125","0.008519591661447326","458"],[0,"0.008519591661447326","0.009290680585958758","346"],[0,"0.009290680585958758","0.010131559020711013","285"],[0,"0.010131559020711013","0.011048543456039804","196"],[0,"0.011048543456039804","0.012048522073499537","129"],[0,"0.012048522073499537","0.013139006488339287","85"],[0,"0.013139006488339287","0.014328188175072986","65"],[0,"0.014328188175072986","0.015625","54"],[0,"0.015625","0.01703918332289465","53"],[0,"0.01703918332289465","0.018581361171917516","20"],[0,"0.018581361171917516","0.020263118041422026","21"],[0,"0.020263118041422026","0.022097086912079608","15"],[0,"0.022097086912079608","0.024097044146999074","11"],[0,"0.024097044146999074","0.026278012976678575","2"],[0,"0.026278012976678575","0.028656376350145972","3"],[0,"0.028656376350145972","0.03125","3"],[0,"0.04052623608284405","0.044194173824159216","2"]]}]}]}}` +func sortPrometheusResponseHeader(headers []*tripperware.PrometheusResponseHeader) { + sort.Slice(headers, func(i, j int) bool { + return headers[i].Name < headers[j].Name + }) +} + func TestRequest(t *testing.T) { t.Parallel() codec := InstantQueryCodec @@ -159,10 +166,11 @@ func TestCompressedResponse(t *testing.T) { h.Set("Content-Type", "application/json") } - h.Set("Content-Encoding", tc.compression) if tc.promBody != nil { - tc.promBody.Headers = append(tc.promBody.Headers, &tripperware.PrometheusResponseHeader{Name: "Content-Encoding", Values: []string{"gzip"}}) + tc.promBody.Headers = append(tc.promBody.Headers, &tripperware.PrometheusResponseHeader{Name: "Content-Encoding", Values: []string{tc.compression}}) } + h.Set("Content-Encoding", tc.compression) + responseBody := &bytes.Buffer{} w := gzip.NewWriter(responseBody) _, err := w.Write(b) @@ -179,6 +187,8 @@ func TestCompressedResponse(t *testing.T) { if err == nil { require.NoError(t, err) + sortPrometheusResponseHeader(tc.promBody.Headers) + sortPrometheusResponseHeader(resp.(*tripperware.PrometheusResponse).Headers) require.Equal(t, tc.promBody, resp) } }) @@ -1738,7 +1748,6 @@ func Benchmark_Decode(b *testing.B) { } }) } - } func Benchmark_Decode_Protobuf(b *testing.B) { @@ -1802,5 +1811,4 @@ func Benchmark_Decode_Protobuf(b *testing.B) { } }) } - } diff --git a/pkg/querier/tripperware/queryrange/query_range_test.go b/pkg/querier/tripperware/queryrange/query_range_test.go index f69457d2099..256d2800ff6 100644 --- a/pkg/querier/tripperware/queryrange/query_range_test.go +++ b/pkg/querier/tripperware/queryrange/query_range_test.go @@ -6,6 +6,7 @@ import ( "context" "io" "net/http" + "sort" "strconv" "testing" @@ -23,6 +24,12 @@ import ( "github.com/cortexproject/cortex/pkg/querier/tripperware" ) +func sortPrometheusResponseHeader(headers []*tripperware.PrometheusResponseHeader) { + sort.Slice(headers, func(i, j int) bool { + return headers[i].Name < headers[j].Name + }) +} + func TestRequest(t *testing.T) { t.Parallel() // Create a Copy parsedRequest to assign the expected headers to the request without affecting other tests using the global. @@ -1263,10 +1270,11 @@ func TestCompressedResponse(t *testing.T) { h.Set("Content-Type", tripperware.ApplicationJson) } - h.Set("Content-Encoding", tc.compression) if tc.promBody != nil { - tc.promBody.Headers = append(tc.promBody.Headers, &tripperware.PrometheusResponseHeader{Name: "Content-Encoding", Values: []string{"gzip"}}) + tc.promBody.Headers = append(tc.promBody.Headers, &tripperware.PrometheusResponseHeader{Name: "Content-Encoding", Values: []string{tc.compression}}) } + h.Set("Content-Encoding", tc.compression) + responseBody := &bytes.Buffer{} w := gzip.NewWriter(responseBody) _, err := w.Write(b) @@ -1283,7 +1291,9 @@ func TestCompressedResponse(t *testing.T) { if err == nil { require.NoError(t, err) - require.Equal(t, tc.promBody.Data, resp.(*tripperware.PrometheusResponse).Data) + sortPrometheusResponseHeader(tc.promBody.Headers) + sortPrometheusResponseHeader(resp.(*tripperware.PrometheusResponse).Headers) + require.Equal(t, tc.promBody, resp) } }) }