From 0a2b5c5e391d36a519b72aa5dfa89f3c92942e69 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Thu, 25 Jul 2024 16:59:48 -0400 Subject: [PATCH 1/9] Added ordered results Signed-off-by: Joe Elliott --- modules/frontend/combiner/common.go | 13 + modules/frontend/combiner/common_test.go | 9 +- modules/frontend/combiner/search.go | 187 ++++- modules/frontend/combiner/search_test.go | 740 +++++++++++------- modules/frontend/combiner/trace_by_id_test.go | 4 +- .../frontend/combiner/trace_by_id_v2_test.go | 4 + .../pipeline/async_handler_multitenant.go | 12 +- modules/frontend/pipeline/pipeline.go | 9 + modules/frontend/pipeline/responses.go | 11 + modules/frontend/search_handlers_test.go | 22 +- modules/frontend/search_sharder.go | 287 ++++--- modules/frontend/search_sharder_test.go | 174 +++- modules/ingester/instance_search.go | 35 +- pkg/api/dedicated_columns_to_json.go | 1 + pkg/traceql/combine.go | 93 ++- pkg/traceql/combine_test.go | 51 ++ pkg/traceql/engine.go | 9 +- pkg/traceql/engine_test.go | 4 +- tempodb/blocklist/poller.go | 9 - tempodb/tempodb_test.go | 13 - 20 files changed, 1173 insertions(+), 514 deletions(-) diff --git a/modules/frontend/combiner/common.go b/modules/frontend/combiner/common.go index 8e687406675..339379cd91f 100644 --- a/modules/frontend/combiner/common.go +++ b/modules/frontend/combiner/common.go @@ -21,6 +21,8 @@ type TResponse interface { type PipelineResponse interface { HTTPResponse() *http.Response RequestData() any + + IsMetadata() bool // todo: search and query range pass back metadata responses through a normal http response. update to use this instead. } type genericCombiner[T TResponse] struct { @@ -30,6 +32,7 @@ type genericCombiner[T TResponse] struct { new func() T combine func(partial T, final T, resp PipelineResponse) error + metadata func(resp PipelineResponse, final T) error finalize func(T) (T, error) diff func(T) (T, error) // currently only implemented by the search combiner. required for streaming quit func(T) bool @@ -49,6 +52,16 @@ func initHTTPCombiner[T TResponse](c *genericCombiner[T], marshalingFormat strin // AddResponse is used to add a http response to the combiner. func (c *genericCombiner[T]) AddResponse(r PipelineResponse) error { + if r.IsMetadata() && c.metadata != nil { + c.mu.Lock() + defer c.mu.Unlock() + + if err := c.metadata(r, c.current); err != nil { + return fmt.Errorf("error processing metadata: %w", err) + } + return nil + } + res := r.HTTPResponse() if res == nil { return nil diff --git a/modules/frontend/combiner/common_test.go b/modules/frontend/combiner/common_test.go index 8bee69c1bb4..166943e5017 100644 --- a/modules/frontend/combiner/common_test.go +++ b/modules/frontend/combiner/common_test.go @@ -198,7 +198,8 @@ func TestGenericCombinerDoesntRace(t *testing.T) { } type testPipelineResponse struct { - r *http.Response + r *http.Response + responseData any } func newTestResponse(t *testing.T) *testPipelineResponse { @@ -230,7 +231,11 @@ func (p *testPipelineResponse) HTTPResponse() *http.Response { } func (p *testPipelineResponse) RequestData() any { - return nil + return p.responseData +} + +func (p *testPipelineResponse) IsMetadata() bool { + return false } func newTestCombiner() *genericCombiner[*tempopb.ServiceStats] { diff --git a/modules/frontend/combiner/search.go b/modules/frontend/combiner/search.go index a4c44dad859..e1245f78d0f 100644 --- a/modules/frontend/combiner/search.go +++ b/modules/frontend/combiner/search.go @@ -1,7 +1,9 @@ package combiner import ( + "net/http" "sort" + "time" "github.com/grafana/tempo/pkg/api" "github.com/grafana/tempo/pkg/search" @@ -9,46 +11,72 @@ import ( "github.com/grafana/tempo/pkg/traceql" ) +var _ PipelineResponse = (*SearchJobResponse)(nil) + +type SearchShards struct { + TotalJobs uint32 + CompletedThroughSeconds uint32 +} + +type SearchJobResponse struct { + TotalBlocks int + TotalJobs int + TotalBytes uint64 + Shards []SearchShards +} + +func (s *SearchJobResponse) HTTPResponse() *http.Response { + return nil +} + +func (s *SearchJobResponse) RequestData() any { + return nil +} + +func (s *SearchJobResponse) IsMetadata() bool { + return true +} + var _ GRPCCombiner[*tempopb.SearchResponse] = (*genericCombiner[*tempopb.SearchResponse])(nil) // NewSearch returns a search combiner -func NewSearch(limit int) Combiner { - metadataCombiner := traceql.NewMetadataCombiner() +func NewSearch(keepMostRecent int) Combiner { + metadataCombiner := traceql.NewMetadataCombiner(keepMostRecent) diffTraces := map[string]struct{}{} + completedThroughTracker := &ShardCompletionTracker{} c := &genericCombiner[*tempopb.SearchResponse]{ httpStatusCode: 200, new: func() *tempopb.SearchResponse { return &tempopb.SearchResponse{} }, current: &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, - combine: func(partial *tempopb.SearchResponse, final *tempopb.SearchResponse, _ PipelineResponse) error { + combine: func(partial *tempopb.SearchResponse, final *tempopb.SearchResponse, resp PipelineResponse) error { + requestIdx, ok := resp.RequestData().(int) + if ok { + completedThroughTracker.addShardIdx(requestIdx) + } + for _, t := range partial.Traces { - // if we've reached the limit and this is NOT a new trace then skip it - if limit > 0 && - metadataCombiner.Count() >= limit && - !metadataCombiner.Exists(t.TraceID) { - continue + if metadataCombiner.AddMetadata(t) { + // record modified traces + diffTraces[t.TraceID] = struct{}{} } - - metadataCombiner.AddMetadata(t) - // record modified traces - diffTraces[t.TraceID] = struct{}{} } if partial.Metrics != nil { - // there is a coordination with the search sharder here. normal responses - // will never have total jobs set, but they will have valid Inspected* values - // a special response is sent back from the sharder with no traces but valid Total* values - // if TotalJobs is nonzero then assume its the special response - if partial.Metrics.TotalJobs == 0 { - final.Metrics.CompletedJobs++ - - final.Metrics.InspectedBytes += partial.Metrics.InspectedBytes - final.Metrics.InspectedTraces += partial.Metrics.InspectedTraces - } else { - final.Metrics.TotalBlocks += partial.Metrics.TotalBlocks - final.Metrics.TotalJobs += partial.Metrics.TotalJobs - final.Metrics.TotalBlockBytes += partial.Metrics.TotalBlockBytes - } + final.Metrics.CompletedJobs++ + final.Metrics.InspectedBytes += partial.Metrics.InspectedBytes + final.Metrics.InspectedTraces += partial.Metrics.InspectedTraces + } + + return nil + }, + metadata: func(resp PipelineResponse, final *tempopb.SearchResponse) error { + if sj, ok := resp.(*SearchJobResponse); ok && sj != nil { + final.Metrics.TotalBlocks += uint32(sj.TotalBlocks) + final.Metrics.TotalJobs += uint32(sj.TotalJobs) + final.Metrics.TotalBlockBytes += sj.TotalBytes + + completedThroughTracker.addShards(sj.Shards) } return nil @@ -67,12 +95,24 @@ func NewSearch(limit int) Combiner { Metrics: current.Metrics, } - for _, tr := range metadataCombiner.Metadata() { + completedThroughSeconds := completedThroughTracker.completedThroughSeconds + // if all jobs are completed then let's just return everything the combiner has + if current.Metrics.CompletedJobs == current.Metrics.TotalJobs && current.Metrics.TotalJobs > 0 { + completedThroughSeconds = 1 + } + + // if we've not completed any shards, then return nothing + if completedThroughSeconds == 0 { + return diff, nil + } + + for _, tr := range metadataCombiner.MetadataAfter(completedThroughSeconds) { // if not in the map, skip. we haven't seen an update if _, ok := diffTraces[tr.TraceID]; !ok { continue } + delete(diffTraces, tr.TraceID) diff.Traces = append(diff.Traces, tr) } @@ -82,19 +122,29 @@ func NewSearch(limit int) Combiner { addRootSpanNotReceivedText(diff.Traces) - // wipe out diff traces for the next time - clear(diffTraces) - return diff, nil }, // search combiner doesn't use current in the way i would have expected. it only tracks metrics through current and uses the results map for the actual traces. // should we change this? quit: func(_ *tempopb.SearchResponse) bool { - if limit <= 0 { + // are we tracking a limit at all? + if keepMostRecent <= 0 { + return false + } + + completedThroughSeconds := completedThroughTracker.completedThroughSeconds + // have we completed any shards? + if completedThroughSeconds == 0 { + return false + } + + // do we have enought? + if metadataCombiner.Count() < keepMostRecent { return false } - return metadataCombiner.Count() >= limit + // is our oldest trace newer than the completedThrough? + return metadataCombiner.OldestTimestampNanos() > uint64(completedThroughSeconds)*uint64(time.Second) }, } initHTTPCombiner(c, api.HeaderAcceptJSON) @@ -112,3 +162,76 @@ func addRootSpanNotReceivedText(results []*tempopb.TraceSearchMetadata) { func NewTypedSearch(limit int) GRPCCombiner[*tempopb.SearchResponse] { return NewSearch(limit).(GRPCCombiner[*tempopb.SearchResponse]) } + +// ShardCompletionTracker +type ShardCompletionTracker struct { + shards []SearchShards + foundResponses []int + + completedThroughSeconds uint32 + curShard int +} + +func (s *ShardCompletionTracker) addShards(shards []SearchShards) uint32 { + if len(shards) == 0 { + return s.completedThroughSeconds + } + + s.shards = shards + + // grow foundResponses to match while keeping the existing values + if len(s.shards) > len(s.foundResponses) { + temp := make([]int, len(s.shards)) + copy(temp, s.foundResponses) + s.foundResponses = temp + } + + for s.incrementCurShardIfComplete() { + } + + return s.completedThroughSeconds +} + +// Add adds a response to the tracker and returns the allowed completedThroughSeconds +func (s *ShardCompletionTracker) addShardIdx(shardIdx int) uint32 { + // we haven't received shards yet + if len(s.shards) == 0 { + // if shardIdx doesn't fit in foundResponses then alloc a new slice and copy foundResponses forward + if shardIdx >= len(s.foundResponses) { + temp := make([]int, shardIdx+1) + copy(temp, s.foundResponses) + s.foundResponses = temp + } + + // and record this idx for when we get shards + s.foundResponses[shardIdx]++ + + return 0 + } + + // + if shardIdx >= len(s.foundResponses) { + return s.completedThroughSeconds + } + + s.foundResponses[shardIdx]++ + for s.incrementCurShardIfComplete() { + } + + return s.completedThroughSeconds +} + +func (s *ShardCompletionTracker) incrementCurShardIfComplete() bool { + if s.curShard >= len(s.shards) { + return false + } + + if s.foundResponses[s.curShard] == int(s.shards[s.curShard].TotalJobs) { + s.completedThroughSeconds = s.shards[s.curShard].CompletedThroughSeconds + s.curShard++ + + return true + } + + return false +} diff --git a/modules/frontend/combiner/search_test.go b/modules/frontend/combiner/search_test.go index e1c23724689..bf475b3475a 100644 --- a/modules/frontend/combiner/search_test.go +++ b/modules/frontend/combiner/search_test.go @@ -2,7 +2,6 @@ package combiner import ( "io" - "math" "net/http" "strings" "testing" @@ -39,7 +38,7 @@ func TestSearchProgressShouldQuit(t *testing.T) { // unparseable body should not quit, but should return an error c = NewSearch(0) - err = c.AddResponse(&pipelineResponse{&http.Response{Body: io.NopCloser(strings.NewReader("foo")), StatusCode: 200}}) + err = c.AddResponse(&testPipelineResponse{r: &http.Response{Body: io.NopCloser(strings.NewReader("foo")), StatusCode: 200}}) require.Error(t, err) should = c.ShouldQuit() require.False(t, should) @@ -57,18 +56,55 @@ func TestSearchProgressShouldQuit(t *testing.T) { should = c.ShouldQuit() require.False(t, should) - // over limit should quit + // over limit but no search job response, should not quit c = NewSearch(1) - err = c.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{ + err = c.AddResponse(toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ Traces: []*tempopb.TraceSearchMetadata{ { - TraceID: "1", + TraceID: "1", + StartTimeUnixNano: uint64(100 * time.Second), }, { - TraceID: "2", + TraceID: "2", + StartTimeUnixNano: uint64(200 * time.Second), }, }, - }, 200)) + }, 200, 0)) // 0 is the shard index + require.NoError(t, err) + should = c.ShouldQuit() + require.False(t, should) + + // send shards. should not quit b/c completed through is 300 + err = c.AddResponse(&SearchJobResponse{ + TotalJobs: 3, + Shards: []SearchShards{ + { + TotalJobs: 1, + CompletedThroughSeconds: 300, + }, + { + TotalJobs: 1, + CompletedThroughSeconds: 150, + }, + { + TotalJobs: 1, + CompletedThroughSeconds: 50, + }, + }, + }) + require.NoError(t, err) + should = c.ShouldQuit() + require.False(t, should) + + // add complete the second shard. quit should be true b/c completed through is 150, our limit is one and we have a trace at 200 + err = c.AddResponse(toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "3", + StartTimeUnixNano: uint64(50 * time.Second), + }, + }, + }, 200, 1)) // 1 is the shard index require.NoError(t, err) should = c.ShouldQuit() require.True(t, should) @@ -199,14 +235,11 @@ func TestSearchResponseCombiner(t *testing.T) { }, { name: "respects total blocks message", - response1: toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: nil, - Metrics: &tempopb.SearchMetrics{ - TotalBlocks: 5, - TotalJobs: 10, - TotalBlockBytes: 15, - }, - }, 200), + response1: &SearchJobResponse{ + TotalBlocks: 5, + TotalJobs: 10, + TotalBytes: 15, + }, response2: toHTTPResponse(t, &tempopb.SearchResponse{ Traces: []*tempopb.TraceSearchMetadata{ { @@ -309,297 +342,468 @@ func TestSearchResponseCombiner(t *testing.T) { } } -func TestSearchDiffsResults(t *testing.T) { - traceID := "traceID" - - c := NewTypedSearch(10) - sr := toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: traceID, +func TestCombinerDiffs(t *testing.T) { + tests := []struct { + name string + pipelineResponse PipelineResponse + expected *tempopb.SearchResponse + }{ + { + name: "initial state", + pipelineResponse: nil, + expected: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{}, + Metrics: &tempopb.SearchMetrics{}, }, }, - Metrics: &tempopb.SearchMetrics{}, - }, 200) - expectedDiff := &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: traceID, - RootServiceName: search.RootSpanNotYetReceivedText, + { + name: "add job metadata", + pipelineResponse: &SearchJobResponse{ + TotalBlocks: 5, + TotalJobs: 6, + TotalBytes: 15, + Shards: []SearchShards{ // 5 shards, 2 jobs each. starting at 500 seconds and walking back 100 seconds each + { + TotalJobs: 2, + CompletedThroughSeconds: 500, + }, + { + TotalJobs: 1, + CompletedThroughSeconds: 400, + }, + { + TotalJobs: 1, + CompletedThroughSeconds: 300, + }, + { + TotalJobs: 1, + CompletedThroughSeconds: 200, + }, + { + TotalJobs: 1, + CompletedThroughSeconds: 100, + }, + }, + }, + expected: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{}, + Metrics: &tempopb.SearchMetrics{ + TotalBlocks: 5, + TotalJobs: 6, + TotalBlockBytes: 15, + }, }, }, - Metrics: &tempopb.SearchMetrics{ - CompletedJobs: 1, + { + name: "add response results", + pipelineResponse: toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "450", + RootServiceName: "root-450", + StartTimeUnixNano: uint64(450 * time.Second), + }, + { + TraceID: "550", + RootServiceName: "root-550", + StartTimeUnixNano: uint64(550 * time.Second), + }, + }, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 1, + InspectedBytes: 2, + }, + }, 200, 0), // shard 0 + expected: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{}, // no traces b/c only one job has finished and the first shard has 2 jobs + Metrics: &tempopb.SearchMetrics{ // metadata is incrementing + CompletedJobs: 1, + InspectedTraces: 1, + InspectedBytes: 2, + TotalBlocks: 5, + TotalJobs: 6, + TotalBlockBytes: 15, + }, + }, }, - } - expectedNoDiff := &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{}, - Metrics: &tempopb.SearchMetrics{}, - } - - // haven't added anything yet - actual, err := c.GRPCDiff() - require.NoError(t, err) - require.Equal(t, expectedNoDiff, actual) - - // add a trace and get it back in diff - err = c.AddResponse(sr) - require.NoError(t, err) - - actual, err = c.GRPCDiff() - require.NoError(t, err) - require.Equal(t, expectedDiff, actual) - - // now we should get no diff again (with 1 completed job) - expectedNoDiff.Metrics.CompletedJobs = 1 - actual, err = c.GRPCDiff() - require.NoError(t, err) - require.Equal(t, expectedNoDiff, actual) - - // let's add a different trace and get it back in diff - traceID2 := "traceID2" - sr2 := toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: traceID2, + { + name: "add second job to finish the first shard and get one result", + pipelineResponse: toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "350", + RootServiceName: "root-350", + StartTimeUnixNano: uint64(350 * time.Second), + }, + }, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 1, + InspectedBytes: 2, + }, + }, 200, 0), // shard 0, + expected: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "550", + RootServiceName: "root-550", + StartTimeUnixNano: uint64(550 * time.Second), + }, + }, + Metrics: &tempopb.SearchMetrics{ // metadata is incrementing + CompletedJobs: 2, + InspectedTraces: 2, + InspectedBytes: 4, + TotalBlocks: 5, + TotalJobs: 6, + TotalBlockBytes: 15, + }, }, }, - Metrics: &tempopb.SearchMetrics{}, - }, 200) - expectedDiff2 := &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: traceID2, - RootServiceName: search.RootSpanNotYetReceivedText, + { + name: "update response results", + pipelineResponse: toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "550", + RootServiceName: "root-550", + RootTraceName: "root-550", + StartTimeUnixNano: uint64(550 * time.Second), + }, + }, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 1, + InspectedBytes: 2, + }, + }, 200, 1), // complete shard 1 + expected: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { // included b/c updated + TraceID: "550", + RootServiceName: "root-550", + RootTraceName: "root-550", + StartTimeUnixNano: uint64(550 * time.Second), + }, + { // included b/c second shard is done + TraceID: "450", + RootServiceName: "root-450", + StartTimeUnixNano: uint64(450 * time.Second), + }, + }, + Metrics: &tempopb.SearchMetrics{ // metadata is incrementing + CompletedJobs: 3, + InspectedTraces: 3, + InspectedBytes: 6, + TotalBlocks: 5, + TotalJobs: 6, + TotalBlockBytes: 15, + }, }, }, - Metrics: &tempopb.SearchMetrics{ - CompletedJobs: 2, // we will have 2 completed jobs at this point + { + name: "skip a shard and see no change", + pipelineResponse: toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "50", + RootServiceName: "root-50", + StartTimeUnixNano: uint64(50 * time.Second), + }, + }, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 1, + InspectedBytes: 2, + }, + }, 200, 3), // complete shard 3, + expected: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{}, // no traces b/c we skipped shard 2 and we can't include results from 3 until 2 is done + Metrics: &tempopb.SearchMetrics{ // metadata is incrementing + CompletedJobs: 4, + InspectedTraces: 4, + InspectedBytes: 8, + TotalBlocks: 5, + TotalJobs: 6, + TotalBlockBytes: 15, + }, + }, + }, + { + name: "fill in shard 2 and see results", + pipelineResponse: toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{}, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 1, + InspectedBytes: 2, + }, + }, 200, 2), // complete shard 2, + expected: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "350", + RootServiceName: "root-350", + StartTimeUnixNano: uint64(350 * time.Second), + }, + }, + Metrics: &tempopb.SearchMetrics{ // metadata is incrementing + CompletedJobs: 5, + InspectedTraces: 5, + InspectedBytes: 10, + TotalBlocks: 5, + TotalJobs: 6, + TotalBlockBytes: 15, + }, + }, + }, + { + name: "complete all shards which dumps all results", + pipelineResponse: toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{}, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 1, + InspectedBytes: 2, + }, + }, 200, 4), // complete shard 4, + expected: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "50", + RootServiceName: "root-50", + StartTimeUnixNano: uint64(50 * time.Second), + }, + }, // 50 is BEFORE the earliest trace shard, but it's still returned b/c at this point we have completed all jobs + Metrics: &tempopb.SearchMetrics{ // metadata is incrementing + CompletedJobs: 6, + InspectedTraces: 6, + InspectedBytes: 12, + TotalBlocks: 5, + TotalJobs: 6, + TotalBlockBytes: 15, + }, + }, }, } - err = c.AddResponse(sr2) - require.NoError(t, err) - - actual, err = c.GRPCDiff() - require.NoError(t, err) - require.Equal(t, expectedDiff2, actual) -} - -type pipelineResponse struct { - r *http.Response -} - -func (p *pipelineResponse) HTTPResponse() *http.Response { - return p.r -} - -func (p *pipelineResponse) RequestData() any { - return nil -} - -func toHTTPResponse(t *testing.T, pb proto.Message, statusCode int) PipelineResponse { - var body string + // apply tests one at a time to the combiner and check expected results + combiner := NewTypedSearch(5) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if tc.pipelineResponse != nil { + err := combiner.AddResponse(tc.pipelineResponse) + require.NoError(t, err) + } - if pb != nil { - var err error - m := jsonpb.Marshaler{} - body, err = m.MarshalToString(pb) - require.NoError(t, err) + resp, err := combiner.GRPCDiff() + require.NoError(t, err) + require.Equal(t, tc.expected, resp) + }) } - - return &pipelineResponse{&http.Response{ - Body: io.NopCloser(strings.NewReader(body)), - StatusCode: statusCode, - }} -} - -func fromHTTPResponse(t *testing.T, r *http.Response, pb proto.Message) { - err := jsonpb.Unmarshal(r.Body, pb) - require.NoError(t, err) } -func TestCombinerDiffs(t *testing.T) { - combiner := NewTypedSearch(100) - - // first request should be empty - resp, err := combiner.GRPCDiff() - require.NoError(t, err) - require.Equal(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{}, - Metrics: &tempopb.SearchMetrics{}, - }, resp) - - err = combiner.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "1234", - RootServiceName: "root", +func TestCompletionTracker(t *testing.T) { + tcs := []struct { + name string + add []int // -1 means send shards + shards []SearchShards + exp uint32 + }{ + // shards only + { + name: "shards only", + add: []int{-1}, + shards: []SearchShards{ + { + TotalJobs: 1, + CompletedThroughSeconds: 100, + }, }, + exp: 0, }, - Metrics: &tempopb.SearchMetrics{ - InspectedTraces: 1, - InspectedBytes: 2, - }, - }, 200)) - require.NoError(t, err) - - // now we should get the same metadata as above - resp, err = combiner.GRPCDiff() - require.NoError(t, err) - require.Equal(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "1234", - RootServiceName: "root", + // indexes only + { + name: "indexes only", + add: []int{1, 0, 1, 3, 2, 0, 1, 1}, + shards: []SearchShards{ + { + TotalJobs: 1, + CompletedThroughSeconds: 100, + }, }, + exp: 0, }, - Metrics: &tempopb.SearchMetrics{ - CompletedJobs: 1, - InspectedTraces: 1, - InspectedBytes: 2, - }, - }, resp) - - // metrics, but the trace hasn't change - resp, err = combiner.GRPCDiff() - require.NoError(t, err) - require.Equal(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{}, - Metrics: &tempopb.SearchMetrics{ - CompletedJobs: 1, - InspectedTraces: 1, - InspectedBytes: 2, - }, - }, resp) - - // new traces - err = combiner.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "5678", - RootServiceName: "root", - StartTimeUnixNano: 1, // forces order - }, - { - TraceID: "9011", - RootServiceName: "root", - StartTimeUnixNano: 2, + // first shard complete, shards first + { + name: "first shard complete, shards first", + add: []int{-1, 0}, + shards: []SearchShards{ + { + TotalJobs: 1, + CompletedThroughSeconds: 100, + }, }, + exp: 100, }, - Metrics: &tempopb.SearchMetrics{ - InspectedTraces: 1, - InspectedBytes: 2, + // first shard complete, index first + { + name: "first shard complete, index first", + add: []int{0, -1}, + shards: []SearchShards{ + { + TotalJobs: 1, + CompletedThroughSeconds: 100, + }, + }, + exp: 100, }, - }, 200)) - require.NoError(t, err) - - resp, err = combiner.GRPCDiff() - require.NoError(t, err) - require.Equal(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "9011", - RootServiceName: "root", - StartTimeUnixNano: 2, + // shards received at various times + { + name: "shards received at various times", + add: []int{-1, 0, 0, 1, 1}, + shards: []SearchShards{ + { + TotalJobs: 2, + CompletedThroughSeconds: 100, + }, + { + TotalJobs: 2, + CompletedThroughSeconds: 200, + }, }, - { - TraceID: "5678", - RootServiceName: "root", - StartTimeUnixNano: 1, + exp: 200, + }, + { + name: "shards received at various times", + add: []int{0, -1, 0, 1, 1}, + shards: []SearchShards{ + { + TotalJobs: 2, + CompletedThroughSeconds: 100, + }, + { + TotalJobs: 2, + CompletedThroughSeconds: 200, + }, }, + exp: 200, }, - Metrics: &tempopb.SearchMetrics{ - CompletedJobs: 2, - InspectedTraces: 2, - InspectedBytes: 4, + { + name: "shards received at various times", + add: []int{0, 0, 1, -1, 1}, + shards: []SearchShards{ + { + TotalJobs: 2, + CompletedThroughSeconds: 100, + }, + { + TotalJobs: 2, + CompletedThroughSeconds: 200, + }, + }, + exp: 200, }, - }, resp) - - // write over existing trace - err = combiner.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "1234", - DurationMs: 100, + { + name: "shards received at various times", + add: []int{0, 0, 1, 1, -1}, + shards: []SearchShards{ + { + TotalJobs: 2, + CompletedThroughSeconds: 100, + }, + { + TotalJobs: 2, + CompletedThroughSeconds: 200, + }, }, + exp: 200, }, - Metrics: &tempopb.SearchMetrics{ - InspectedTraces: 1, - InspectedBytes: 2, + // bad data received + { + name: "bad data received last", + add: []int{-1, 0, 0, 2}, + shards: []SearchShards{ + { + TotalJobs: 2, + CompletedThroughSeconds: 100, + }, + { + TotalJobs: 2, + CompletedThroughSeconds: 200, + }, + }, + exp: 100, }, - }, 200)) - require.NoError(t, err) - - resp, err = combiner.GRPCDiff() - require.NoError(t, err) - require.Equal(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "1234", - RootServiceName: "root", - DurationMs: 100, + { + name: "bad data immediately after shards", + add: []int{-1, 2, 0, 0}, + shards: []SearchShards{ + { + TotalJobs: 2, + CompletedThroughSeconds: 100, + }, + { + TotalJobs: 2, + CompletedThroughSeconds: 200, + }, }, + exp: 100, }, - Metrics: &tempopb.SearchMetrics{ - CompletedJobs: 3, - InspectedTraces: 3, - InspectedBytes: 6, + { + name: "bad data immediately before shards", + add: []int{0, 0, 2, -1}, + shards: []SearchShards{ + { + TotalJobs: 2, + CompletedThroughSeconds: 100, + }, + { + TotalJobs: 2, + CompletedThroughSeconds: 200, + }, + }, + exp: 100, }, - }, resp) -} + } -func TestSearchCombinerDoesNotRace(t *testing.T) { - end := make(chan struct{}) - concurrent := func(f func()) { - for { - select { - case <-end: - return - default: - f() + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + tracker := &ShardCompletionTracker{} + + ct := uint32(0) + for _, sc := range tc.add { + if sc == -1 { + ct = tracker.addShards(tc.shards) + continue + } + + ct = tracker.addShardIdx(sc) } - } + require.Equal(t, int(tc.exp), int(ct)) + }) } +} - traceID := "1234" - combiner := NewTypedSearch(10) - i := 0 - go concurrent(func() { - i++ - resp := toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: traceID, - StartTimeUnixNano: math.MaxUint64 - uint64(i), - DurationMs: uint32(i), - SpanSets: []*tempopb.SpanSet{{ - Matched: uint32(i), - }}, - }, - }, - Metrics: &tempopb.SearchMetrics{ - InspectedTraces: 1, - InspectedBytes: 1, - TotalBlocks: 1, - TotalJobs: 1, - CompletedJobs: 1, - }, - }, 200) - _ = combiner.AddResponse(resp) - }) +func toHTTPResponseWithResponseData(t *testing.T, pb proto.Message, statusCode int, responseData any) PipelineResponse { + var body string - go concurrent(func() { - _, _ = combiner.GRPCFinal() - }) + if pb != nil { + var err error + m := jsonpb.Marshaler{} + body, err = m.MarshalToString(pb) + require.NoError(t, err) + } - go concurrent(func() { - _, _ = combiner.HTTPFinal() - }) + return &testPipelineResponse{ + responseData: responseData, + r: &http.Response{ + Body: io.NopCloser(strings.NewReader(body)), + StatusCode: statusCode, + }, + } +} + +func toHTTPResponse(t *testing.T, pb proto.Message, statusCode int) PipelineResponse { + return toHTTPResponseWithResponseData(t, pb, statusCode, nil) +} - time.Sleep(100 * time.Millisecond) - close(end) - // Wait for go funcs to quit before - // exiting and cleaning up - time.Sleep(2 * time.Second) +func fromHTTPResponse(t *testing.T, r *http.Response, pb proto.Message) { + err := jsonpb.Unmarshal(r.Body, pb) + require.NoError(t, err) } diff --git a/modules/frontend/combiner/trace_by_id_test.go b/modules/frontend/combiner/trace_by_id_test.go index ac83e8ead6e..5deaaf598f8 100644 --- a/modules/frontend/combiner/trace_by_id_test.go +++ b/modules/frontend/combiner/trace_by_id_test.go @@ -43,7 +43,7 @@ func TestTraceByIDShouldQuit(t *testing.T) { // unparseable body should not quit, but should return an error c = NewTraceByID(0, api.HeaderAcceptJSON) - err = c.AddResponse(&pipelineResponse{&http.Response{Body: io.NopCloser(strings.NewReader("foo")), StatusCode: 200}}) + err = c.AddResponse(&testPipelineResponse{r: &http.Response{Body: io.NopCloser(strings.NewReader("foo")), StatusCode: 200}}) require.Error(t, err) should = c.ShouldQuit() require.False(t, should) @@ -101,7 +101,7 @@ func toHTTPProtoResponse(t *testing.T, pb proto.Message, statusCode int) Pipelin require.NoError(t, err) } - return &pipelineResponse{&http.Response{ + return &testPipelineResponse{r: &http.Response{ Body: io.NopCloser(bytes.NewReader(body)), StatusCode: statusCode, }} diff --git a/modules/frontend/combiner/trace_by_id_v2_test.go b/modules/frontend/combiner/trace_by_id_v2_test.go index 7df3095d443..6d6d6dc933f 100644 --- a/modules/frontend/combiner/trace_by_id_v2_test.go +++ b/modules/frontend/combiner/trace_by_id_v2_test.go @@ -27,6 +27,10 @@ func (m MockResponse) RequestData() any { return nil } +func (m MockResponse) IsMetadata() bool { + return false +} + func TestNewTraceByIdV2ReturnsAPartialTrace(t *testing.T) { traceResponse := &tempopb.TraceByIDResponse{ Trace: test.MakeTrace(2, []byte{0x01, 0x02}), diff --git a/modules/frontend/pipeline/async_handler_multitenant.go b/modules/frontend/pipeline/async_handler_multitenant.go index 984787d7113..c8c2f6bd4d2 100644 --- a/modules/frontend/pipeline/async_handler_multitenant.go +++ b/modules/frontend/pipeline/async_handler_multitenant.go @@ -60,13 +60,15 @@ func (t *tenantRoundTripper) RoundTrip(req Request) (Responses[combiner.Pipeline // requestForTenant makes a copy of request and injects the tenant id into context and Header. // this allows us to keep all multi-tenant logic in query frontend and keep other components single tenant func requestForTenant(req Request, tenant string) Request { - r := req.HTTPRequest() + clonedR := req.Clone() + + r := clonedR.HTTPRequest() + r.Header.Set(user.OrgIDHeaderName, tenant) + ctx := r.Context() + clonedR.WithContext(user.InjectOrgID(ctx, tenant)) - ctx = user.InjectOrgID(ctx, tenant) - rCopy := r.Clone(ctx) - rCopy.Header.Set(user.OrgIDHeaderName, tenant) - return NewHTTPRequest(rCopy) + return clonedR } type unsupportedRoundTripper struct { diff --git a/modules/frontend/pipeline/pipeline.go b/modules/frontend/pipeline/pipeline.go index 920cd4eb203..a2e18cb3ae1 100644 --- a/modules/frontend/pipeline/pipeline.go +++ b/modules/frontend/pipeline/pipeline.go @@ -14,6 +14,7 @@ type Request interface { HTTPRequest() *http.Request Context() context.Context WithContext(context.Context) + Clone() Request Weight() int SetWeight(int) @@ -50,6 +51,14 @@ func (r HTTPRequest) Context() context.Context { return r.req.Context() } +func (r *HTTPRequest) Clone() Request { + return &HTTPRequest{ + req: r.req.Clone(r.req.Context()), + cacheKey: r.cacheKey, + responseData: r.responseData, + } +} + func (r *HTTPRequest) WithContext(ctx context.Context) { r.req = r.req.WithContext(ctx) } diff --git a/modules/frontend/pipeline/responses.go b/modules/frontend/pipeline/responses.go index 8aabb3f67c1..6f9b7ac7513 100644 --- a/modules/frontend/pipeline/responses.go +++ b/modules/frontend/pipeline/responses.go @@ -30,11 +30,22 @@ func (p pipelineResponse) RequestData() any { return p.requestData } +func (p pipelineResponse) IsMetadata() bool { + return false +} + // syncResponse is a single http.Response that implements the Responses[*http.Response] interface. type syncResponse struct { r combiner.PipelineResponse } +// NewAsyncResponse creates a new AsyncResponse that wraps a single http.Response. +func NewAsyncResponse(r combiner.PipelineResponse) Responses[combiner.PipelineResponse] { + return syncResponse{ + r: r, + } +} + // NewHTTPToAsyncResponse creates a new AsyncResponse that wraps a single http.Response. func NewHTTPToAsyncResponse(r *http.Response) Responses[combiner.PipelineResponse] { return syncResponse{ diff --git a/modules/frontend/search_handlers_test.go b/modules/frontend/search_handlers_test.go index 05766eb78ba..43df9cfe2af 100644 --- a/modules/frontend/search_handlers_test.go +++ b/modules/frontend/search_handlers_test.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "math" "math/rand" "net/http" "net/http/httptest" @@ -175,8 +176,9 @@ func runnerRequests(t *testing.T, f *QueryFrontend) { expectedStatusCode: 200, expectedResponse: &tempopb.SearchResponse{ Traces: []*tempopb.TraceSearchMetadata{{ - TraceID: "1", - RootServiceName: search.RootSpanNotYetReceivedText, + TraceID: "1", + RootServiceName: search.RootSpanNotYetReceivedText, + StartTimeUnixNano: math.MaxUint64, }}, Metrics: &tempopb.SearchMetrics{ InspectedTraces: 4, @@ -212,8 +214,9 @@ func runnerRequests(t *testing.T, f *QueryFrontend) { expectedStatusCode: 200, expectedResponse: &tempopb.SearchResponse{ Traces: []*tempopb.TraceSearchMetadata{{ - TraceID: "1", - RootServiceName: search.RootSpanNotYetReceivedText, + TraceID: "1", + RootServiceName: search.RootSpanNotYetReceivedText, + StartTimeUnixNano: math.MaxUint64, }}, Metrics: &tempopb.SearchMetrics{ InspectedTraces: 8, @@ -304,7 +307,8 @@ func TestSearchLimitHonored(t *testing.T) { return &tempopb.SearchResponse{ Traces: []*tempopb.TraceSearchMetadata{ { - TraceID: util.TraceIDToHexString(test.ValidTraceID(nil)), + TraceID: util.TraceIDToHexString(test.ValidTraceID(nil)), + StartTimeUnixNano: math.MaxUint64, // forces GRPCDiff in the search combiner to return this trace b/c it's always after CompletedThroughSeconds }, }, Metrics: &tempopb.SearchMetrics{ @@ -387,7 +391,6 @@ func TestSearchLimitHonored(t *testing.T) { ctx := user.InjectOrgID(httpReq.Context(), tenant) httpReq = httpReq.WithContext(ctx) - httpResp := httptest.NewRecorder() f.SearchHandler.ServeHTTP(httpResp, httpReq) @@ -403,7 +406,7 @@ func TestSearchLimitHonored(t *testing.T) { } // grpc - combiner := traceql.NewMetadataCombiner() + combiner := traceql.NewMetadataCombiner(100) err = f.streamingSearch(tc.request, newMockStreamingServer(tenant, func(i int, sr *tempopb.SearchResponse) { // combine for _, t := range sr.Traces { @@ -414,7 +417,7 @@ func TestSearchLimitHonored(t *testing.T) { require.Equal(t, status.Error(codes.InvalidArgument, "adjust limit: limit 20 exceeds max limit 15"), err) } else { require.NoError(t, err) - require.Equal(t, combiner.Count(), tc.expectedTraces) + require.Equal(t, tc.expectedTraces, combiner.Count()) } }) } @@ -718,7 +721,8 @@ func frontendWithSettings(t require.TestingT, next pipeline.RoundTripper, rdr te return &tempopb.SearchResponse{ Traces: []*tempopb.TraceSearchMetadata{ { - TraceID: "1", + TraceID: "1", + StartTimeUnixNano: math.MaxUint64, // forces GRPCDiff in the search combiner to return this trace b/c it's always after CompletedThroughSeconds }, }, Metrics: &tempopb.SearchMetrics{ diff --git a/modules/frontend/search_sharder.go b/modules/frontend/search_sharder.go index a8e1df35ec8..16fd6f48fcc 100644 --- a/modules/frontend/search_sharder.go +++ b/modules/frontend/search_sharder.go @@ -3,10 +3,11 @@ package frontend import ( "context" "fmt" + "math" + "sort" "time" "github.com/go-kit/log" //nolint:all deprecated - "github.com/gogo/protobuf/jsonpb" "github.com/grafana/dskit/user" "github.com/segmentio/fasthash/fnv1a" @@ -23,6 +24,8 @@ import ( const ( defaultTargetBytesPerRequest = 100 * 1024 * 1024 defaultConcurrentRequests = 1000 + // every search is broken into the same number of search shards. these shards are used to stream partial results back that are guaranteed to be latest results + maxSearchShards = 200 ) type SearchSharderConfig struct { @@ -90,76 +93,53 @@ func (s asyncSearchSharder) RoundTrip(pipelineRequest pipeline.Request) (pipelin return pipeline.NewBadRequest(fmt.Errorf("range specified by start and end exceeds %s. received start=%d end=%d", maxDuration, searchReq.Start, searchReq.End)), nil } - // buffer of shards+1 allows us to insert ingestReq and metrics + // buffer allows us to insert any ingester reqs non-blocking reqCh := make(chan pipeline.Request, s.cfg.IngesterShards+1) // build request to search ingesters based on query_ingesters_until config and time range // pass subCtx in requests so we can cancel and exit early - err = s.ingesterRequests(ctx, tenantID, pipelineRequest, *searchReq, reqCh) + jobMetrics, err := s.ingesterRequests(ctx, tenantID, pipelineRequest, *searchReq, reqCh) if err != nil { return nil, err } - // Check the number of requests that were were written to the request channel - // before we start reading them. - ingesterJobs := len(reqCh) - // pass subCtx in requests so we can cancel and exit early - totalJobs, totalBlocks, totalBlockBytes := s.backendRequests(ctx, tenantID, pipelineRequest, searchReq, reqCh, func(err error) { + jobMetrics = s.backendRequests(ctx, tenantID, pipelineRequest, searchReq, jobMetrics, reqCh, func(err error) { // todo: actually find a way to return this error to the user s.logger.Log("msg", "search: failed to build backend requests", "err", err) }) - totalJobs += ingesterJobs - - // send a job to communicate the search metrics. this is consumed by the combiner to calculate totalblocks/bytes/jobs - var jobMetricsResponse pipeline.Responses[combiner.PipelineResponse] - if totalJobs > 0 { - resp := &tempopb.SearchResponse{ - Metrics: &tempopb.SearchMetrics{ - TotalBlocks: uint32(totalBlocks), - TotalBlockBytes: totalBlockBytes, - TotalJobs: uint32(totalJobs), - }, - } - - m := jsonpb.Marshaler{} - body, err := m.MarshalToString(resp) - if err != nil { - return nil, fmt.Errorf("failed to marshal search metrics: %w", err) - } - - jobMetricsResponse = pipeline.NewSuccessfulResponse(body) - } // execute requests - return pipeline.NewAsyncSharderChan(ctx, s.cfg.ConcurrentRequests, reqCh, jobMetricsResponse, s.next), nil + return pipeline.NewAsyncSharderChan(ctx, s.cfg.ConcurrentRequests, reqCh, pipeline.NewAsyncResponse(jobMetrics), s.next), nil } -// blockMetas returns all relevant blockMetas given a start/end -func (s *asyncSearchSharder) blockMetas(start, end int64, tenantID string) []*backend.BlockMeta { +func (s *asyncSearchSharder) blockMetas(start, end uint32, tenantID string) []*backend.BlockMeta { // reduce metas to those in the requested range - allMetas := s.reader.BlockMetas(tenantID) - metas := make([]*backend.BlockMeta, 0, len(allMetas)/50) // divide by 50 for luck - for _, m := range allMetas { - if m.StartTime.Unix() <= end && - m.EndTime.Unix() >= start && + allBlocks := s.reader.BlockMetas(tenantID) + blocks := make([]*backend.BlockMeta, 0, len(allBlocks)/50) // divide by 50 for luck + for _, m := range allBlocks { + if m.StartTime.Unix() <= int64(end) && + m.EndTime.Unix() >= int64(start) && m.ReplicationFactor == backend.DefaultReplicationFactor { // This check skips generator blocks (RF=1) - metas = append(metas, m) + blocks = append(blocks, m) } } - return metas + // search backwards in time + sort.Slice(blocks, func(i, j int) bool { + return blocks[i].EndTime.After(blocks[j].EndTime) + }) + + return blocks } // backendRequest builds backend requests to search backend blocks. backendRequest takes ownership of reqCh and closes it. // it returns 3 int values: totalBlocks, totalBlockBytes, and estimated jobs -func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, reqCh chan<- pipeline.Request, errFn func(error)) (totalJobs, totalBlocks int, totalBlockBytes uint64) { - var blocks []*backend.BlockMeta - +func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, resp *combiner.SearchJobResponse, reqCh chan<- pipeline.Request, errFn func(error)) *combiner.SearchJobResponse { // request without start or end, search only in ingester if searchReq.Start == 0 || searchReq.End == 0 { close(reqCh) - return + return resp } // calculate duration (start and end) to search the backend blocks @@ -168,48 +148,58 @@ func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID strin // no need to search backend if start == end { close(reqCh) - return + return resp } - // get block metadata of blocks in start, end duration - blocks = s.blockMetas(int64(start), int64(end), tenantID) - - targetBytesPerRequest := s.cfg.TargetBytesPerRequest + blocks := s.blockMetas(start, end, tenantID) // calculate metrics to return to the caller - totalBlocks = len(blocks) - for _, b := range blocks { - p := pagesPerRequest(b, targetBytesPerRequest) + resp.TotalBlocks = len(blocks) - totalJobs += int(b.TotalRecords) / p - if int(b.TotalRecords)%p != 0 { - totalJobs++ - } - totalBlockBytes += b.Size_ - } + blockIter := backendJobsFunc(blocks, s.cfg.TargetBytesPerRequest, maxSearchShards, searchReq.End) + blockIter(func(jobs int, sz uint64, completedThroughTime uint32) { + resp.TotalJobs += jobs + resp.TotalBytes += sz + + resp.Shards = append(resp.Shards, combiner.SearchShards{ + TotalJobs: uint32(jobs), + CompletedThroughSeconds: completedThroughTime, + }) + }, nil) go func() { - buildBackendRequests(ctx, tenantID, parent, searchReq, blocks, targetBytesPerRequest, reqCh, errFn) + buildBackendRequests(ctx, tenantID, parent, searchReq, blockIter, reqCh, errFn) }() - return + return resp } // ingesterRequest returns a new start and end time range for the backend as well as an http request // that covers the ingesters. If nil is returned for the http.Request then there is no ingesters query. // since this function modifies searchReq.Start and End we are taking a value instead of a pointer to prevent it from // unexpectedly changing the passed searchReq. -func (s *asyncSearchSharder) ingesterRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tempopb.SearchRequest, reqCh chan pipeline.Request) error { +func (s *asyncSearchSharder) ingesterRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tempopb.SearchRequest, reqCh chan pipeline.Request) (*combiner.SearchJobResponse, error) { + resp := &combiner.SearchJobResponse{ + Shards: make([]combiner.SearchShards, 0, maxSearchShards+1), // +1 for the ingester shard + } + // request without start or end, search only in ingester if searchReq.Start == 0 || searchReq.End == 0 { - return buildIngesterRequest(ctx, tenantID, parent, &searchReq, reqCh) + // one shard that covers all time + resp.TotalJobs = 1 + resp.Shards = append(resp.Shards, combiner.SearchShards{ + TotalJobs: 1, + CompletedThroughSeconds: 1, + }) + + return resp, buildIngesterRequestChan(ctx, tenantID, parent, &searchReq, reqCh) } ingesterUntil := uint32(time.Now().Add(-s.cfg.QueryIngestersUntil).Unix()) // if there's no overlap between the query and ingester range just return nil if searchReq.End < ingesterUntil { - return nil + return resp, nil } ingesterStart := searchReq.Start @@ -222,7 +212,7 @@ func (s *asyncSearchSharder) ingesterRequests(ctx context.Context, tenantID stri // if ingester start == ingester end then we don't need to query it if ingesterStart == ingesterEnd { - return nil + return resp, nil } searchReq.Start = ingesterStart @@ -257,13 +247,22 @@ func (s *asyncSearchSharder) ingesterRequests(ctx context.Context, tenantID stri subReq.Start = shardStart subReq.End = shardEnd - err := buildIngesterRequest(ctx, tenantID, parent, &subReq, reqCh) + err := buildIngesterRequestChan(ctx, tenantID, parent, &subReq, reqCh) if err != nil { - return err + return nil, err } } - return nil + // add one shard that covers no time at all. this will force the combiner to wait + // for ingester requests to complete before moving on to the backend requests + ingesterJobs := len(reqCh) + resp.TotalJobs = ingesterJobs + resp.Shards = append(resp.Shards, combiner.SearchShards{ + TotalJobs: uint32(ingesterJobs), + CompletedThroughSeconds: math.MaxUint32, + }) + + return resp, nil } // maxDuration returns the max search duration allowed for this tenant. @@ -297,59 +296,53 @@ func backendRange(start, end uint32, queryBackendAfter time.Duration) (uint32, u // buildBackendRequests returns a slice of requests that cover all blocks in the store // that are covered by start/end. -func buildBackendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, metas []*backend.BlockMeta, bytesPerRequest int, reqCh chan<- pipeline.Request, errFn func(error)) { +func buildBackendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, blockIter func(shardIterFn, jobIterFn), reqCh chan<- pipeline.Request, errFn func(error)) { defer close(reqCh) queryHash := hashForSearchRequest(searchReq) colsToJSON := api.NewDedicatedColumnsToJSON() - for _, m := range metas { - pages := pagesPerRequest(m, bytesPerRequest) - if pages == 0 { - continue - } - + blockIter(nil, func(m *backend.BlockMeta, shard, startPage, pages int) { blockID := m.BlockID.String() - for startPage := 0; startPage < int(m.TotalRecords); startPage += pages { - subR := parent.HTTPRequest().Clone(ctx) + subR := parent.HTTPRequest().Clone(ctx) - dedColsJSON, err := colsToJSON.JSONForDedicatedColumns(m.DedicatedColumns) - if err != nil { - errFn(fmt.Errorf("failed to convert dedicated columns. block: %s tempopb: %w", blockID, err)) - continue - } - - subR, err = api.BuildSearchBlockRequest(subR, &tempopb.SearchBlockRequest{ - BlockID: blockID, - StartPage: uint32(startPage), - PagesToSearch: uint32(pages), - Encoding: m.Encoding.String(), - IndexPageSize: m.IndexPageSize, - TotalRecords: m.TotalRecords, - DataEncoding: m.DataEncoding, - Version: m.Version, - Size_: m.Size_, - FooterSize: m.FooterSize, - // DedicatedColumns: dc, for perf reason we pass dedicated columns json in directly to not have to realloc object -> proto -> json - }, dedColsJSON) - if err != nil { - errFn(fmt.Errorf("failed to build search block request. block: %s tempopb: %w", blockID, err)) - continue - } + dedColsJSON, err := colsToJSON.JSONForDedicatedColumns(m.DedicatedColumns) + if err != nil { + errFn(fmt.Errorf("failed to convert dedicated columns. block: %s tempopb: %w", blockID, err)) + return + } - prepareRequestForQueriers(subR, tenantID) - key := searchJobCacheKey(tenantID, queryHash, int64(searchReq.Start), int64(searchReq.End), m, startPage, pages) - pipelineR := parent.CloneFromHTTPRequest(subR) - pipelineR.SetCacheKey(key) + subR, err = api.BuildSearchBlockRequest(subR, &tempopb.SearchBlockRequest{ + BlockID: blockID, + StartPage: uint32(startPage), + PagesToSearch: uint32(pages), + Encoding: m.Encoding.String(), + IndexPageSize: m.IndexPageSize, + TotalRecords: m.TotalRecords, + DataEncoding: m.DataEncoding, + Version: m.Version, + Size_: m.Size_, + FooterSize: m.FooterSize, + // DedicatedColumns: dc, for perf reason we pass dedicated columns json in directly to not have to realloc object -> proto -> json + }, dedColsJSON) + if err != nil { + errFn(fmt.Errorf("failed to build search block request. block: %s tempopb: %w", blockID, err)) + return + } - select { - case reqCh <- pipelineR: - case <-ctx.Done(): - // ignore the error if there is one. it will be handled elsewhere - return - } + prepareRequestForQueriers(subR, tenantID) + key := searchJobCacheKey(tenantID, queryHash, int64(searchReq.Start), int64(searchReq.End), m, startPage, pages) + pipelineR := parent.CloneFromHTTPRequest(subR) + pipelineR.SetCacheKey(key) + pipelineR.SetResponseData(shard) + + select { + case reqCh <- pipelineR: + case <-ctx.Done(): + // ignore the error if there is one. it will be handled elsewhere + return } - } + }) } // hashForSearchRequest returns a uint64 hash of the query. if the query is invalid it returns a 0 hash. @@ -396,7 +389,7 @@ func pagesPerRequest(m *backend.BlockMeta, bytesPerRequest int) int { return pagesPerQuery } -func buildIngesterRequest(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, reqCh chan pipeline.Request) error { +func buildIngesterRequestChan(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, reqCh chan pipeline.Request) error { subR := parent.HTTPRequest().Clone(ctx) subR, err := api.BuildSearchRequest(subR, searchReq) if err != nil { @@ -404,6 +397,78 @@ func buildIngesterRequest(ctx context.Context, tenantID string, parent pipeline. } prepareRequestForQueriers(subR, tenantID) - reqCh <- parent.CloneFromHTTPRequest(subR) + + pipelineR := parent.CloneFromHTTPRequest(subR) + pipelineR.SetResponseData(0) // ingester requests are always shard 0 + + reqCh <- pipelineR return nil } + +type shardIterFn func(jobs int, sz uint64, completedThroughTime uint32) +type jobIterFn func(m *backend.BlockMeta, shard, startPage, pages int) + +// backendJobsFunc provides an iter func with 2 callbacks designed to be used once to calculate job and shard metrics and a second time +// to generate actual jobs. +func backendJobsFunc(blocks []*backend.BlockMeta, targetBytesPerRequest int, maxShards int, end uint32) func(shardIterFn, jobIterFn) { + blocksPerShard := len(blocks) / maxShards + + // if we have fewer blocks than shards then every shard is one block + if blocksPerShard == 0 { + blocksPerShard = 1 + } + + return func(shardIterCallback shardIterFn, jobIterCallback jobIterFn) { + currentShard := 0 + jobsInShard := 0 + bytesInShard := uint64(0) + blocksInShard := 0 + + for _, b := range blocks { + pages := pagesPerRequest(b, targetBytesPerRequest) + jobsInBlock := 0 + + if pages == 0 { + continue + } + + // if jobIterCallBack is nil we can skip the loop and directly calc the jobsInBlock + if jobIterCallback == nil { + jobsInBlock = int(b.TotalRecords) / pages + if int(b.TotalRecords)%pages != 0 { + jobsInBlock++ + } + } else { + for startPage := 0; startPage < int(b.TotalRecords); startPage += pages { + jobIterCallback(b, currentShard, startPage, pages) + jobsInBlock++ + } + } + + // do we need to roll to a new shard? + jobsInShard += jobsInBlock + bytesInShard += b.Size_ + blocksInShard++ + + // -1 b/c we will likely add a final shard below + // end comparison b/c there's no point in ending a shard that can't release any results + if blocksInShard >= blocksPerShard && currentShard < maxShards-1 && b.EndTime.Unix() < int64(end) { + if shardIterCallback != nil { + shardIterCallback(jobsInShard, bytesInShard, uint32(b.EndTime.Unix())) + } + currentShard++ + + jobsInShard = 0 + bytesInShard = 0 + blocksInShard = 0 + } + } + + // final shard - note that we are overpacking the final shard due to the integer math as well as the limit of 200 shards total. if the search + // this is the least impactful shard to place extra jobs in as it is searched last. if we make it here the chances of this being an exhaustive search + // are higher + if shardIterCallback != nil && jobsInShard > 0 { + shardIterCallback(jobsInShard, bytesInShard, 1) // final shard can cover all time. we don't need to be precise + } + } +} diff --git a/modules/frontend/search_sharder_test.go b/modules/frontend/search_sharder_test.go index ed8eb3abe6f..3aeb2be7dca 100644 --- a/modules/frontend/search_sharder_test.go +++ b/modules/frontend/search_sharder_test.go @@ -1,13 +1,14 @@ package frontend import ( - "bytes" "context" "fmt" "io" + "math" "net/http" "net/http/httptest" "net/url" + "sort" "strconv" "strings" "testing" @@ -224,9 +225,10 @@ func TestBuildBackendRequests(t *testing.T) { ctx, cancelCause := context.WithCancelCause(context.Background()) reqCh := make(chan pipeline.Request) + iterFn := backendJobsFunc(tc.metas, tc.targetBytesPerRequest, maxSearchShards, math.MaxUint32) go func() { - buildBackendRequests(ctx, "test", pipeline.NewHTTPRequest(req), searchReq, tc.metas, tc.targetBytesPerRequest, reqCh, cancelCause) + buildBackendRequests(ctx, "test", pipeline.NewHTTPRequest(req), searchReq, iterFn, reqCh, cancelCause) }() actualURIs := []string{} @@ -317,10 +319,10 @@ func TestBackendRequests(t *testing.T) { ctx, cancelCause := context.WithCancelCause(context.Background()) pipelineRequest := pipeline.NewHTTPRequest(r) - jobs, blocks, blockBytes := s.backendRequests(ctx, "test", pipelineRequest, searchReq, reqCh, cancelCause) - require.Equal(t, tc.expectedJobs, jobs) - require.Equal(t, tc.expectedBlocks, blocks) - require.Equal(t, tc.expectedBlockBytes, blockBytes) + searchJobResponse := s.backendRequests(ctx, "test", pipelineRequest, searchReq, &combiner.SearchJobResponse{}, reqCh, cancelCause) + require.Equal(t, tc.expectedJobs, searchJobResponse.TotalJobs) + require.Equal(t, tc.expectedBlocks, searchJobResponse.TotalBlocks) + require.Equal(t, tc.expectedBlockBytes, searchJobResponse.TotalBytes) actualReqURIs := []string{} for r := range reqCh { @@ -494,13 +496,24 @@ func TestIngesterRequests(t *testing.T) { pr := pipeline.NewHTTPRequest(req) pr.SetWeight(2) - err = s.ingesterRequests(context.Background(), "test", pr, *searchReq, reqChan) + actualSearchResponse, err := s.ingesterRequests(context.Background(), "test", pr, *searchReq, reqChan) if tc.expectedError != nil { - assert.Equal(t, tc.expectedError, err) + require.Equal(t, tc.expectedError, err) continue } - assert.NoError(t, err) - assert.Equal(t, len(tc.expectedURI), len(reqChan)) + require.NoError(t, err) + require.Equal(t, len(tc.expectedURI), len(reqChan)) + require.Equal(t, len(tc.expectedURI), actualSearchResponse.TotalJobs) + if len(tc.expectedURI) > 0 { + require.Equal(t, len(tc.expectedURI), int(actualSearchResponse.Shards[0].TotalJobs)) + expectedCompletedThrough := math.MaxUint32 // normal ingester shard completes no time on purpose + if searchReq.Start == 0 && searchReq.End == 0 { // ingester only search completes all time on purpose + expectedCompletedThrough = 1 + } + require.Equal(t, expectedCompletedThrough, int(actualSearchResponse.Shards[0].CompletedThroughSeconds)) + } else { + require.Equal(t, 0, len(actualSearchResponse.Shards)) + } // drain the channel and check the URIs for _, expectedURI := range tc.expectedURI { @@ -680,29 +693,24 @@ func TestTotalJobsIncludesIngester(t *testing.T) { resps, err := testRT.RoundTrip(pipeline.NewHTTPRequest(req)) require.NoError(t, err) // find a response with total jobs > . this is the metadata response - var resp *tempopb.SearchResponse + + totalJobs := 0 for { res, done, err := resps.Next(context.Background()) - r := res.HTTPResponse() - require.NoError(t, err) - require.Equal(t, 200, r.StatusCode) - actualResp := &tempopb.SearchResponse{} - bytesResp, err := io.ReadAll(r.Body) - require.NoError(t, err) - err = jsonpb.Unmarshal(bytes.NewReader(bytesResp), actualResp) - require.NoError(t, err) + if res.IsMetadata() { + searchJobResponse := res.(*combiner.SearchJobResponse) + totalJobs += searchJobResponse.TotalJobs - if actualResp.Metrics.TotalJobs > 0 { - resp = actualResp break } + require.NoError(t, err) require.False(t, done) } - // 2 jobs for the meta + 1 for th ingester - assert.Equal(t, uint32(3), resp.Metrics.TotalJobs) + // 2 jobs for the meta + 1 for the ingester + assert.Equal(t, 3, totalJobs) } func TestSearchSharderRoundTripBadRequest(t *testing.T) { @@ -863,6 +871,126 @@ func TestHashTraceQLQuery(t *testing.T) { require.NotEqual(t, h1, h2) } +func TestBackendShards(t *testing.T) { + tcs := []struct { + name string + maxShards int + searchEnd uint32 + expected []combiner.SearchShards + }{ + { + name: "1 shard, puts all jobs in one shard", + maxShards: 1, + searchEnd: 50, + expected: []combiner.SearchShards{ + {TotalJobs: 8, CompletedThroughSeconds: 1}, + }, + }, + { + name: "2 shards, split evenly between", + maxShards: 2, + searchEnd: 50, + expected: []combiner.SearchShards{ + {TotalJobs: 4, CompletedThroughSeconds: 30}, + {TotalJobs: 4, CompletedThroughSeconds: 1}, + }, + }, + { + name: "3 shards, one for each block", + maxShards: 3, + searchEnd: 50, + expected: []combiner.SearchShards{ + {TotalJobs: 2, CompletedThroughSeconds: 40}, + {TotalJobs: 2, CompletedThroughSeconds: 30}, + {TotalJobs: 4, CompletedThroughSeconds: 1}, + }, + }, + { + name: "4 shards, one for each block", + maxShards: 4, + searchEnd: 50, + expected: []combiner.SearchShards{ + {TotalJobs: 2, CompletedThroughSeconds: 40}, + {TotalJobs: 2, CompletedThroughSeconds: 30}, + {TotalJobs: 2, CompletedThroughSeconds: 20}, + {TotalJobs: 2, CompletedThroughSeconds: 1}, + }, + }, + { + name: "5 shards, one for each block", + maxShards: 5, + searchEnd: 50, + expected: []combiner.SearchShards{ + {TotalJobs: 2, CompletedThroughSeconds: 40}, + {TotalJobs: 2, CompletedThroughSeconds: 30}, + {TotalJobs: 2, CompletedThroughSeconds: 20}, + {TotalJobs: 2, CompletedThroughSeconds: 10}, + }, + }, + { + name: "4 shards, search end forces 2 blocks in the first shard", + maxShards: 4, + searchEnd: 35, + expected: []combiner.SearchShards{ + {TotalJobs: 4, CompletedThroughSeconds: 30}, + {TotalJobs: 2, CompletedThroughSeconds: 20}, + {TotalJobs: 2, CompletedThroughSeconds: 10}, + }, + }, + { + name: "4 shards, search end forces 3 blocks in the first shard", + maxShards: 4, + searchEnd: 25, + expected: []combiner.SearchShards{ + {TotalJobs: 6, CompletedThroughSeconds: 20}, + {TotalJobs: 2, CompletedThroughSeconds: 10}, + }, + }, + { + name: "2 shards, search end forces 2 blocks in the first shard", + maxShards: 2, + searchEnd: 35, + expected: []combiner.SearchShards{ + {TotalJobs: 4, CompletedThroughSeconds: 30}, + {TotalJobs: 4, CompletedThroughSeconds: 1}, + }, + }, + } + + // create 4 metas with 2 records each for all the above test cases to use. 8 jobs total + metas := make([]*backend.BlockMeta, 0, 4) + for i := 0; i < 4; i++ { + metas = append(metas, &backend.BlockMeta{ + StartTime: time.Unix(int64(i*10), 0), // block 0 starts at 0 + EndTime: time.Unix(int64(i*10)+10, 0), // block 0 ends a 10 + Size_: defaultTargetBytesPerRequest * 2, // 2 jobs per block + TotalRecords: 2, + BlockID: backend.MustParse("00000000-0000-0000-0000-000000000000"), + }) + } + + // sort + sort.Slice(metas, func(i, j int) bool { + return metas[i].EndTime.After(metas[j].EndTime) + }) + + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + fn := backendJobsFunc(metas, defaultTargetBytesPerRequest, tc.maxShards, tc.searchEnd) + actualShards := []combiner.SearchShards{} + + fn(func(jobs int, sz uint64, completedThroughTime uint32) { + actualShards = append(actualShards, combiner.SearchShards{ + TotalJobs: uint32(jobs), + CompletedThroughSeconds: completedThroughTime, + }) + }, nil) + + assert.Equal(t, tc.expected, actualShards) + }) + } +} + func urisEqual(t *testing.T, expectedURIs, actualURIs []string) { require.Equal(t, len(expectedURIs), len(actualURIs)) diff --git a/modules/ingester/instance_search.go b/modules/ingester/instance_search.go index 98349bb0356..040d19afcb3 100644 --- a/modules/ingester/instance_search.go +++ b/modules/ingester/instance_search.go @@ -7,7 +7,6 @@ import ( "sync" "github.com/gogo/protobuf/proto" - "github.com/google/uuid" "github.com/segmentio/fasthash/fnv1a" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -43,22 +42,31 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem var ( resultsMtx = sync.Mutex{} - combiner = traceql.NewMetadataCombiner() + combiner = traceql.NewMetadataCombiner(maxResults) metrics = &tempopb.SearchMetrics{} opts = common.DefaultSearchOptions() anyErr atomic.Error ) - search := func(blockID uuid.UUID, block common.Searcher, spanName string) { + search := func(blockMeta *backend.BlockMeta, block common.Searcher, spanName string) { ctx, span := tracer.Start(ctx, "instance.searchBlock."+spanName) defer span.End() span.AddEvent("block entry mtx acquired") - span.SetAttributes(attribute.String("blockID", blockID.String())) + span.SetAttributes(attribute.String("blockID", blockMeta.BlockID.String())) var resp *tempopb.SearchResponse var err error + // if blocks end time < the oldest entry in the combiner we can ignore b/c its impossible for a trace in this block + // to be more recent than our current results + // if endtime = 0 in means the block is not yet completed and we should always search it + if combiner.Count() >= maxResults && + !blockMeta.EndTime.IsZero() && + blockMeta.EndTime.Unix() < int64(combiner.OldestTimestampNanos()) { + return + } + if api.IsTraceQLQuery(req) { // note: we are creating new engine for each wal block, // and engine.ExecuteSearch is parsing the query for each block @@ -70,7 +78,7 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem } if errors.Is(err, common.ErrUnsupported) { - level.Warn(log.Logger).Log("msg", "block does not support search", "blockID", blockID) + level.Warn(log.Logger).Log("msg", "block does not support search", "blockID", blockMeta.BlockID) return } if errors.Is(err, context.Canceled) { @@ -78,7 +86,7 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem return } if err != nil { - level.Error(log.Logger).Log("msg", "error searching block", "blockID", blockID, "err", err) + level.Error(log.Logger).Log("msg", "error searching block", "blockID", blockMeta.BlockID, "err", err) anyErr.Store(err) return } @@ -95,17 +103,8 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem metrics.InspectedBytes += resp.Metrics.InspectedBytes } - if combiner.Count() >= maxResults { - return - } - for _, tr := range resp.Traces { combiner.AddMetadata(tr) - if combiner.Count() >= maxResults { - // Cancel all other tasks - cancel() - return - } } } @@ -119,7 +118,7 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem i.headBlockMtx.RLock() span.AddEvent("acquired headblock mtx") if includeBlock(i.headBlock.BlockMeta(), req) { - search((uuid.UUID)(i.headBlock.BlockMeta().BlockID), i.headBlock, "headBlock") + search(i.headBlock.BlockMeta(), i.headBlock, "headBlock") } i.headBlockMtx.RUnlock() if err := anyErr.Load(); err != nil { @@ -150,7 +149,7 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem wg.Add(1) go func(b common.WALBlock) { defer wg.Done() - search((uuid.UUID)(b.BlockMeta().BlockID), b, "completingBlock") + search(b.BlockMeta(), b, "completingBlock") }(b) } @@ -161,7 +160,7 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem wg.Add(1) go func(b *LocalBlock) { defer wg.Done() - search((uuid.UUID)(b.BlockMeta().BlockID), b, "completeBlock") + search(b.BlockMeta(), b, "completeBlock") }(b) } diff --git a/pkg/api/dedicated_columns_to_json.go b/pkg/api/dedicated_columns_to_json.go index 457ab161566..13d9b4ac257 100644 --- a/pkg/api/dedicated_columns_to_json.go +++ b/pkg/api/dedicated_columns_to_json.go @@ -23,6 +23,7 @@ func (d *DedicatedColumnsToJSON) JSONForDedicatedColumns(cols backend.DedicatedC } hash := cols.Hash() + if jsonString, ok := d.columnsToJSON[hash]; ok { return jsonString, nil } diff --git a/pkg/traceql/combine.go b/pkg/traceql/combine.go index 6976d0e7868..73611280c99 100644 --- a/pkg/traceql/combine.go +++ b/pkg/traceql/combine.go @@ -1,31 +1,83 @@ package traceql import ( - "sort" + "slices" "strings" + "time" "github.com/grafana/tempo/pkg/tempopb" + "github.com/grafana/tempo/pkg/util" ) type MetadataCombiner struct { - trs map[string]*tempopb.TraceSearchMetadata + trs map[string]*tempopb.TraceSearchMetadata + trsSorted []*tempopb.TraceSearchMetadata + keepMostRecent int } -func NewMetadataCombiner() *MetadataCombiner { +func NewMetadataCombiner(keepMostRecent int) *MetadataCombiner { return &MetadataCombiner{ - trs: make(map[string]*tempopb.TraceSearchMetadata), + trs: make(map[string]*tempopb.TraceSearchMetadata, keepMostRecent), + trsSorted: make([]*tempopb.TraceSearchMetadata, 0, keepMostRecent), + keepMostRecent: keepMostRecent, } } +// AddSpanset adds a new spanset to the combiner. It only performs the asTraceSearchMetadata +// conversion if the spanset will be added +func (c *MetadataCombiner) AddSpanset(new *Spanset) { + // if we're not configured to keep most recent then just add it + if c.keepMostRecent == 0 || c.Count() < c.keepMostRecent { + c.AddMetadata(asTraceSearchMetadata(new)) + return + } + + // else let's see if it's worth converting this to a metadata and adding it + // if it's already in the list, then we should add it + if _, ok := c.trs[util.TraceIDToHexString(new.TraceID)]; ok { + c.AddMetadata(asTraceSearchMetadata(new)) + return + } + + // if it's within range + if c.OldestTimestampNanos() <= new.StartTimeUnixNanos { + c.AddMetadata(asTraceSearchMetadata(new)) + return + } + + // this spanset is too old to bother converting and adding it +} + // AddMetadata adds the new metadata to the map. if it already exists // use CombineSearchResults to combine the two -func (c *MetadataCombiner) AddMetadata(new *tempopb.TraceSearchMetadata) { +func (c *MetadataCombiner) AddMetadata(new *tempopb.TraceSearchMetadata) bool { if existing, ok := c.trs[new.TraceID]; ok { combineSearchResults(existing, new) - return + return true + } + + if c.Count() == c.keepMostRecent && c.keepMostRecent > 0 { + // if this is older than the oldest element, bail + if c.OldestTimestampNanos() > new.StartTimeUnixNano { + return false + } + + // otherwise remove the oldest element and we'll add the new one below + oldest := c.trsSorted[c.Count()-1] + delete(c.trs, oldest.TraceID) + c.trsSorted = c.trsSorted[:len(c.trsSorted)-1] } + // insert new in the right spot c.trs[new.TraceID] = new + idx, _ := slices.BinarySearchFunc(c.trsSorted, new, func(a, b *tempopb.TraceSearchMetadata) int { + if a.StartTimeUnixNano > b.StartTimeUnixNano { + return -1 + } + return 1 + }) + c.trsSorted = slices.Insert(c.trsSorted, idx, new) + return true } func (c *MetadataCombiner) Count() int { @@ -38,14 +90,29 @@ func (c *MetadataCombiner) Exists(id string) bool { } func (c *MetadataCombiner) Metadata() []*tempopb.TraceSearchMetadata { - m := make([]*tempopb.TraceSearchMetadata, 0, len(c.trs)) - for _, tr := range c.trs { - m = append(m, tr) + return c.trsSorted +} + +// MetadataAfter returns all traces that started after the given time +func (c *MetadataCombiner) MetadataAfter(afterSeconds uint32) []*tempopb.TraceSearchMetadata { + afterNanos := uint64(afterSeconds) * uint64(time.Second) + afterTraces := make([]*tempopb.TraceSearchMetadata, 0, len(c.trsSorted)) + + for _, tr := range c.trsSorted { + if tr.StartTimeUnixNano > afterNanos { + afterTraces = append(afterTraces, tr) + } } - sort.Slice(m, func(i, j int) bool { - return m[i].StartTimeUnixNano > m[j].StartTimeUnixNano - }) - return m + + return afterTraces +} + +func (c *MetadataCombiner) OldestTimestampNanos() uint64 { + if len(c.trsSorted) == 0 { + return 0 + } + + return c.trsSorted[len(c.trsSorted)-1].StartTimeUnixNano } // combineSearchResults overlays the incoming search result with the existing result. This is required diff --git a/pkg/traceql/combine_test.go b/pkg/traceql/combine_test.go index a5503786024..43d950ba437 100644 --- a/pkg/traceql/combine_test.go +++ b/pkg/traceql/combine_test.go @@ -2,6 +2,7 @@ package traceql import ( "fmt" + "math/rand/v2" "slices" "strings" "testing" @@ -9,6 +10,7 @@ import ( "github.com/grafana/tempo/pkg/tempopb" v1 "github.com/grafana/tempo/pkg/tempopb/common/v1" + "github.com/grafana/tempo/pkg/util" "github.com/stretchr/testify/require" ) @@ -268,6 +270,55 @@ func TestCombineResults(t *testing.T) { } } +func TestCombinerKeepsMostRecent(t *testing.T) { + totalTraces := 10 + keepMostRecent := 5 + combiner := NewMetadataCombiner(keepMostRecent) + + // make traces + traces := make([]*Spanset, totalTraces) + for i := 0; i < totalTraces; i++ { + traceID, err := util.HexStringToTraceID(fmt.Sprintf("%d", i)) + require.NoError(t, err) + + traces[i] = &Spanset{ + TraceID: traceID, + StartTimeUnixNanos: uint64(i) * uint64(time.Second), + } + } + + // save off the most recent and reverse b/c the combiner returns most recent first + expected := make([]*tempopb.TraceSearchMetadata, 0, keepMostRecent) + for i := totalTraces - keepMostRecent; i < totalTraces; i++ { + expected = append(expected, asTraceSearchMetadata(traces[i])) + } + slices.Reverse(expected) + + rand.Shuffle(totalTraces, func(i, j int) { + traces[i], traces[j] = traces[j], traces[i] + }) + + // add to combiner + for i := 0; i < totalTraces; i++ { + combiner.AddSpanset(traces[i]) + } + + // test that the most recent are kept + actual := combiner.Metadata() + require.Equal(t, expected, actual) + require.Equal(t, keepMostRecent, combiner.Count()) + require.Equal(t, expected[len(expected)-1].StartTimeUnixNano, combiner.OldestTimestampNanos()) + for _, tr := range expected { + require.True(t, combiner.Exists(tr.TraceID)) + } + + // test MetadataAfter. 10 traces are added with start times 0-9. We want to get all traces that started after 7 + afterSeconds := uint32(7) + expectedTracesCount := totalTraces - int(afterSeconds+1) + actualTraces := combiner.MetadataAfter(afterSeconds) + require.Equal(t, expectedTracesCount, len(actualTraces)) +} + // nolint:govet func TestQueryRangeCombinerDiffs(t *testing.T) { start := uint64(100 * time.Millisecond) diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go index e3288d7e64b..f2657bc25de 100644 --- a/pkg/traceql/engine.go +++ b/pkg/traceql/engine.go @@ -111,7 +111,7 @@ func (e *Engine) ExecuteSearch(ctx context.Context, searchReq *tempopb.SearchReq Traces: nil, Metrics: &tempopb.SearchMetrics{}, } - combiner := NewMetadataCombiner() + combiner := NewMetadataCombiner(int(searchReq.Limit)) for { spanset, err := iterator.Next(ctx) if err != nil && !errors.Is(err, io.EOF) { @@ -121,11 +121,8 @@ func (e *Engine) ExecuteSearch(ctx context.Context, searchReq *tempopb.SearchReq if spanset == nil { break } - combiner.AddMetadata(e.asTraceSearchMetadata(spanset)) - if combiner.Count() >= int(searchReq.Limit) && searchReq.Limit > 0 { - break - } + combiner.AddSpanset(spanset) } res.Traces = combiner.Metadata() @@ -242,7 +239,7 @@ func (e *Engine) createAutocompleteRequest(tag Attribute, pipeline Pipeline) Fet return autocompleteReq } -func (e *Engine) asTraceSearchMetadata(spanset *Spanset) *tempopb.TraceSearchMetadata { +func asTraceSearchMetadata(spanset *Spanset) *tempopb.TraceSearchMetadata { metadata := &tempopb.TraceSearchMetadata{ TraceID: util.TraceIDToHexString(spanset.TraceID), RootServiceName: spanset.RootServiceName, diff --git a/pkg/traceql/engine_test.go b/pkg/traceql/engine_test.go index f7f0599b4e4..467e87fb292 100644 --- a/pkg/traceql/engine_test.go +++ b/pkg/traceql/engine_test.go @@ -253,9 +253,7 @@ func TestEngine_asTraceSearchMetadata(t *testing.T) { }, } - e := NewEngine() - - traceSearchMetadata := e.asTraceSearchMetadata(spanSet) + traceSearchMetadata := asTraceSearchMetadata(spanSet) expectedSpanset := &tempopb.SpanSet{ Matched: 2, diff --git a/tempodb/blocklist/poller.go b/tempodb/blocklist/poller.go index e2fc1ed2751..8e611af869e 100644 --- a/tempodb/blocklist/poller.go +++ b/tempodb/blocklist/poller.go @@ -6,7 +6,6 @@ import ( "fmt" "math/rand" "path" - "sort" "strconv" "sync" "time" @@ -372,14 +371,6 @@ func (p *Poller) pollTenantBlocks( newBlockList = append(newBlockList, newM...) newCompactedBlocklist = append(newCompactedBlocklist, newCm...) - sort.Slice(newBlockList, func(i, j int) bool { - return newBlockList[i].StartTime.Before(newBlockList[j].StartTime) - }) - - sort.Slice(newCompactedBlocklist, func(i, j int) bool { - return newCompactedBlocklist[i].StartTime.Before(newCompactedBlocklist[j].StartTime) - }) - return newBlockList, newCompactedBlocklist, nil } diff --git a/tempodb/tempodb_test.go b/tempodb/tempodb_test.go index d47374f9047..a11bf3c694e 100644 --- a/tempodb/tempodb_test.go +++ b/tempodb/tempodb_test.go @@ -240,24 +240,11 @@ func checkBlocklists(t *testing.T, expectedID uuid.UUID, expectedB int, expected assert.Equal(t, expectedID, (uuid.UUID)(blocklist[0].BlockID)) } - // confirm blocklists are in starttime ascending order - lastTime := time.Time{} - for _, b := range blocklist { - assert.True(t, lastTime.Before(b.StartTime) || lastTime.Equal(b.StartTime)) - lastTime = b.StartTime - } - compactedBlocklist := rw.blocklist.CompactedMetas(testTenantID) assert.Len(t, compactedBlocklist, expectedCB) if expectedCB > 0 && expectedID != uuid.Nil { assert.Equal(t, expectedID, (uuid.UUID)(compactedBlocklist[0].BlockID)) } - - lastTime = time.Time{} - for _, b := range compactedBlocklist { - assert.True(t, lastTime.Before(b.StartTime) || lastTime.Equal(b.StartTime)) - lastTime = b.StartTime - } } func TestIncludeBlock(t *testing.T) { From 5c4b2dd4ccfcd20172318eaf2c32424208ec7269 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Thu, 24 Oct 2024 13:42:14 -0400 Subject: [PATCH 2/9] add most_recent query hint Signed-off-by: Joe Elliott --- modules/frontend/combiner/search.go | 56 +-- modules/frontend/combiner/search_test.go | 484 +++++++++++--------- modules/frontend/pipeline/responses_test.go | 8 +- modules/frontend/search_handlers.go | 34 +- modules/frontend/search_handlers_test.go | 23 +- modules/frontend/tag_handlers_test.go | 23 +- modules/ingester/ingester_test.go | 8 +- modules/ingester/instance_search.go | 30 +- modules/ingester/instance_search_test.go | 25 +- pkg/traceql/combine.go | 140 +++++- pkg/traceql/combine_test.go | 4 +- pkg/traceql/engine.go | 12 +- pkg/traceql/enum_hints.go | 3 +- 13 files changed, 518 insertions(+), 332 deletions(-) diff --git a/modules/frontend/combiner/search.go b/modules/frontend/combiner/search.go index e1245f78d0f..6f164714590 100644 --- a/modules/frontend/combiner/search.go +++ b/modules/frontend/combiner/search.go @@ -2,8 +2,6 @@ package combiner import ( "net/http" - "sort" - "time" "github.com/grafana/tempo/pkg/api" "github.com/grafana/tempo/pkg/search" @@ -40,8 +38,8 @@ func (s *SearchJobResponse) IsMetadata() bool { var _ GRPCCombiner[*tempopb.SearchResponse] = (*genericCombiner[*tempopb.SearchResponse])(nil) // NewSearch returns a search combiner -func NewSearch(keepMostRecent int) Combiner { - metadataCombiner := traceql.NewMetadataCombiner(keepMostRecent) +func NewSearch(limit int, keepMostRecent bool) Combiner { + metadataCombiner := traceql.NewMetadataCombiner(limit, keepMostRecent) diffTraces := map[string]struct{}{} completedThroughTracker := &ShardCompletionTracker{} @@ -95,18 +93,25 @@ func NewSearch(keepMostRecent int) Combiner { Metrics: current.Metrics, } - completedThroughSeconds := completedThroughTracker.completedThroughSeconds - // if all jobs are completed then let's just return everything the combiner has - if current.Metrics.CompletedJobs == current.Metrics.TotalJobs && current.Metrics.TotalJobs > 0 { - completedThroughSeconds = 1 - } - - // if we've not completed any shards, then return nothing - if completedThroughSeconds == 0 { - return diff, nil + metadataFn := metadataCombiner.Metadata + if keepMostRecent { + metadataFn = func() []*tempopb.TraceSearchMetadata { + completedThroughSeconds := completedThroughTracker.completedThroughSeconds + // if all jobs are completed then let's just return everything the combiner has + if current.Metrics.CompletedJobs == current.Metrics.TotalJobs && current.Metrics.TotalJobs > 0 { + completedThroughSeconds = 1 + } + + // if we've not completed any shards, then return nothing + if completedThroughSeconds == 0 { + return nil + } + + return metadataCombiner.MetadataAfter(completedThroughSeconds) + } } - for _, tr := range metadataCombiner.MetadataAfter(completedThroughSeconds) { + for _, tr := range metadataFn() { // if not in the map, skip. we haven't seen an update if _, ok := diffTraces[tr.TraceID]; !ok { continue @@ -116,10 +121,6 @@ func NewSearch(keepMostRecent int) Combiner { diff.Traces = append(diff.Traces, tr) } - sort.Slice(diff.Traces, func(i, j int) bool { - return diff.Traces[i].StartTimeUnixNano > diff.Traces[j].StartTimeUnixNano - }) - addRootSpanNotReceivedText(diff.Traces) return diff, nil @@ -127,24 +128,13 @@ func NewSearch(keepMostRecent int) Combiner { // search combiner doesn't use current in the way i would have expected. it only tracks metrics through current and uses the results map for the actual traces. // should we change this? quit: func(_ *tempopb.SearchResponse) bool { - // are we tracking a limit at all? - if keepMostRecent <= 0 { - return false - } - completedThroughSeconds := completedThroughTracker.completedThroughSeconds // have we completed any shards? if completedThroughSeconds == 0 { - return false - } - - // do we have enought? - if metadataCombiner.Count() < keepMostRecent { - return false + completedThroughSeconds = traceql.TimestampNever } - // is our oldest trace newer than the completedThrough? - return metadataCombiner.OldestTimestampNanos() > uint64(completedThroughSeconds)*uint64(time.Second) + return metadataCombiner.IsCompleteFor(completedThroughSeconds) }, } initHTTPCombiner(c, api.HeaderAcceptJSON) @@ -159,8 +149,8 @@ func addRootSpanNotReceivedText(results []*tempopb.TraceSearchMetadata) { } } -func NewTypedSearch(limit int) GRPCCombiner[*tempopb.SearchResponse] { - return NewSearch(limit).(GRPCCombiner[*tempopb.SearchResponse]) +func NewTypedSearch(limit int, keepMostRecent bool) GRPCCombiner[*tempopb.SearchResponse] { + return NewSearch(limit, keepMostRecent).(GRPCCombiner[*tempopb.SearchResponse]) } // ShardCompletionTracker diff --git a/modules/frontend/combiner/search_test.go b/modules/frontend/combiner/search_test.go index bf475b3475a..778666bc7c1 100644 --- a/modules/frontend/combiner/search_test.go +++ b/modules/frontend/combiner/search_test.go @@ -16,35 +16,92 @@ import ( "google.golang.org/grpc/codes" ) -func TestSearchProgressShouldQuit(t *testing.T) { +func TestSearchProgressShouldQuitAny(t *testing.T) { // new combiner should not quit - c := NewSearch(0) + c := NewSearch(0, false) should := c.ShouldQuit() require.False(t, should) // 500 response should quit - c = NewSearch(0) + c = NewSearch(0, false) err := c.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{}, 500)) require.NoError(t, err) should = c.ShouldQuit() require.True(t, should) // 429 response should quit - c = NewSearch(0) + c = NewSearch(0, false) err = c.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{}, 429)) require.NoError(t, err) should = c.ShouldQuit() require.True(t, should) // unparseable body should not quit, but should return an error - c = NewSearch(0) + c = NewSearch(0, false) err = c.AddResponse(&testPipelineResponse{r: &http.Response{Body: io.NopCloser(strings.NewReader("foo")), StatusCode: 200}}) require.Error(t, err) should = c.ShouldQuit() require.False(t, should) // under limit should not quit - c = NewSearch(2) + c = NewSearch(2, false) + err = c.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "1", + }, + }, + }, 200)) + require.NoError(t, err) + should = c.ShouldQuit() + require.False(t, should) + + // over limit should quit + c = NewSearch(1, false) + err = c.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "1", + }, + { + TraceID: "2", + }, + }, + }, 200)) + require.NoError(t, err) + should = c.ShouldQuit() + require.True(t, should) +} + +func TestSearchProgressShouldQuitMostRecent(t *testing.T) { + // new combiner should not quit + c := NewSearch(0, true) + should := c.ShouldQuit() + require.False(t, should) + + // 500 response should quit + c = NewSearch(0, true) + err := c.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{}, 500)) + require.NoError(t, err) + should = c.ShouldQuit() + require.True(t, should) + + // 429 response should quit + c = NewSearch(0, true) + err = c.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{}, 429)) + require.NoError(t, err) + should = c.ShouldQuit() + require.True(t, should) + + // unparseable body should not quit, but should return an error + c = NewSearch(0, true) + err = c.AddResponse(&testPipelineResponse{r: &http.Response{Body: io.NopCloser(strings.NewReader("foo")), StatusCode: 200}}) + require.Error(t, err) + should = c.ShouldQuit() + require.False(t, should) + + // under limit should not quit + c = NewSearch(2, true) err = c.AddResponse(toHTTPResponse(t, &tempopb.SearchResponse{ Traces: []*tempopb.TraceSearchMetadata{ { @@ -57,7 +114,7 @@ func TestSearchProgressShouldQuit(t *testing.T) { require.False(t, should) // over limit but no search job response, should not quit - c = NewSearch(1) + c = NewSearch(1, true) err = c.AddResponse(toHTTPResponseWithResponseData(t, &tempopb.SearchResponse{ Traces: []*tempopb.TraceSearchMetadata{ { @@ -111,238 +168,242 @@ func TestSearchProgressShouldQuit(t *testing.T) { } func TestSearchCombinesResults(t *testing.T) { - start := time.Date(1, 2, 3, 4, 5, 6, 7, time.UTC) - traceID := "traceID" + for _, keepMostRecent := range []bool{true, false} { + start := time.Date(1, 2, 3, 4, 5, 6, 7, time.UTC) + traceID := "traceID" - c := NewSearch(10) - sr := toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: traceID, - StartTimeUnixNano: uint64(start.Add(time.Second).UnixNano()), - DurationMs: uint32(time.Second.Milliseconds()), - }, // 1 second after start and shorter duration - { - TraceID: traceID, - StartTimeUnixNano: uint64(start.UnixNano()), - DurationMs: uint32(time.Hour.Milliseconds()), - }, // earliest start time and longer duration - { - TraceID: traceID, - StartTimeUnixNano: uint64(start.Add(time.Hour).UnixNano()), - DurationMs: uint32(time.Millisecond.Milliseconds()), - }, // 1 hour after start and shorter duration - }, - Metrics: &tempopb.SearchMetrics{}, - }, 200) - err := c.AddResponse(sr) - require.NoError(t, err) + c := NewSearch(10, keepMostRecent) + sr := toHTTPResponse(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: traceID, + StartTimeUnixNano: uint64(start.Add(time.Second).UnixNano()), + DurationMs: uint32(time.Second.Milliseconds()), + }, // 1 second after start and shorter duration + { + TraceID: traceID, + StartTimeUnixNano: uint64(start.UnixNano()), + DurationMs: uint32(time.Hour.Milliseconds()), + }, // earliest start time and longer duration + { + TraceID: traceID, + StartTimeUnixNano: uint64(start.Add(time.Hour).UnixNano()), + DurationMs: uint32(time.Millisecond.Milliseconds()), + }, // 1 hour after start and shorter duration + }, + Metrics: &tempopb.SearchMetrics{}, + }, 200) + err := c.AddResponse(sr) + require.NoError(t, err) - expected := &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: traceID, - StartTimeUnixNano: uint64(start.UnixNano()), - DurationMs: uint32(time.Hour.Milliseconds()), - RootServiceName: search.RootSpanNotYetReceivedText, + expected := &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: traceID, + StartTimeUnixNano: uint64(start.UnixNano()), + DurationMs: uint32(time.Hour.Milliseconds()), + RootServiceName: search.RootSpanNotYetReceivedText, + }, }, - }, - Metrics: &tempopb.SearchMetrics{ - CompletedJobs: 1, - }, - } + Metrics: &tempopb.SearchMetrics{ + CompletedJobs: 1, + }, + } - resp, err := c.HTTPFinal() - require.NoError(t, err) + resp, err := c.HTTPFinal() + require.NoError(t, err) - actual := &tempopb.SearchResponse{} - fromHTTPResponse(t, resp, actual) + actual := &tempopb.SearchResponse{} + fromHTTPResponse(t, resp, actual) - require.Equal(t, expected, actual) + require.Equal(t, expected, actual) + } } func TestSearchResponseCombiner(t *testing.T) { - tests := []struct { - name string - response1 PipelineResponse - response2 PipelineResponse - - expectedStatus int - expectedResponse *tempopb.SearchResponse - expectedHTTPError error - expectedGRPCError error - }{ - { - name: "empty returns", - response1: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), - response2: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), - expectedStatus: 200, - expectedResponse: &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{}, - Metrics: &tempopb.SearchMetrics{ - CompletedJobs: 2, + for _, keepMostRecent := range []bool{true, false} { + tests := []struct { + name string + response1 PipelineResponse + response2 PipelineResponse + + expectedStatus int + expectedResponse *tempopb.SearchResponse + expectedHTTPError error + expectedGRPCError error + }{ + { + name: "empty returns", + response1: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), + response2: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), + expectedStatus: 200, + expectedResponse: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{}, + Metrics: &tempopb.SearchMetrics{ + CompletedJobs: 2, + }, }, }, - }, - { - name: "404+200", - response1: toHTTPResponse(t, nil, 404), - response2: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), - expectedStatus: 404, - expectedGRPCError: status.Error(codes.NotFound, ""), - }, - { - name: "200+400", - response1: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), - response2: toHTTPResponse(t, nil, 400), - expectedStatus: 400, - expectedGRPCError: status.Error(codes.InvalidArgument, ""), - }, - { - name: "200+429", - response1: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), - response2: toHTTPResponse(t, nil, 429), - expectedStatus: 429, - expectedGRPCError: status.Error(codes.ResourceExhausted, ""), - }, - { - name: "500+404", - response1: toHTTPResponse(t, nil, 500), - response2: toHTTPResponse(t, nil, 404), - expectedStatus: 500, - expectedGRPCError: status.Error(codes.Internal, ""), - }, - { - name: "404+500 - first bad response wins", - response1: toHTTPResponse(t, nil, 404), - response2: toHTTPResponse(t, nil, 500), - expectedStatus: 404, - expectedGRPCError: status.Error(codes.NotFound, ""), - }, - { - name: "500+200", - response1: toHTTPResponse(t, nil, 500), - response2: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), - expectedStatus: 500, - expectedGRPCError: status.Error(codes.Internal, ""), - }, - { - name: "200+500", - response1: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), - response2: toHTTPResponse(t, nil, 500), - expectedStatus: 500, - expectedGRPCError: status.Error(codes.Internal, ""), - }, - { - name: "respects total blocks message", - response1: &SearchJobResponse{ - TotalBlocks: 5, - TotalJobs: 10, - TotalBytes: 15, + { + name: "404+200", + response1: toHTTPResponse(t, nil, 404), + response2: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), + expectedStatus: 404, + expectedGRPCError: status.Error(codes.NotFound, ""), }, - response2: toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "5678", - StartTimeUnixNano: 0, + { + name: "200+400", + response1: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), + response2: toHTTPResponse(t, nil, 400), + expectedStatus: 400, + expectedGRPCError: status.Error(codes.InvalidArgument, ""), + }, + { + name: "200+429", + response1: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), + response2: toHTTPResponse(t, nil, 429), + expectedStatus: 429, + expectedGRPCError: status.Error(codes.ResourceExhausted, ""), + }, + { + name: "500+404", + response1: toHTTPResponse(t, nil, 500), + response2: toHTTPResponse(t, nil, 404), + expectedStatus: 500, + expectedGRPCError: status.Error(codes.Internal, ""), + }, + { + name: "404+500 - first bad response wins", + response1: toHTTPResponse(t, nil, 404), + response2: toHTTPResponse(t, nil, 500), + expectedStatus: 404, + expectedGRPCError: status.Error(codes.NotFound, ""), + }, + { + name: "500+200", + response1: toHTTPResponse(t, nil, 500), + response2: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), + expectedStatus: 500, + expectedGRPCError: status.Error(codes.Internal, ""), + }, + { + name: "200+500", + response1: toHTTPResponse(t, &tempopb.SearchResponse{Metrics: &tempopb.SearchMetrics{}}, 200), + response2: toHTTPResponse(t, nil, 500), + expectedStatus: 500, + expectedGRPCError: status.Error(codes.Internal, ""), + }, + { + name: "respects total blocks message", + response1: &SearchJobResponse{ + TotalBlocks: 5, + TotalJobs: 10, + TotalBytes: 15, + }, + response2: toHTTPResponse(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "5678", + StartTimeUnixNano: 0, + }, }, - }, - Metrics: &tempopb.SearchMetrics{ - InspectedTraces: 5, - InspectedBytes: 7, - }, - }, 200), - expectedStatus: 200, - expectedResponse: &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "5678", - StartTimeUnixNano: 0, - RootServiceName: search.RootSpanNotYetReceivedText, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 5, + InspectedBytes: 7, + }, + }, 200), + expectedStatus: 200, + expectedResponse: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "5678", + StartTimeUnixNano: 0, + RootServiceName: search.RootSpanNotYetReceivedText, + }, + }, + Metrics: &tempopb.SearchMetrics{ + TotalBlocks: 5, + TotalJobs: 10, + TotalBlockBytes: 15, + InspectedTraces: 5, + InspectedBytes: 7, + CompletedJobs: 1, }, - }, - Metrics: &tempopb.SearchMetrics{ - TotalBlocks: 5, - TotalJobs: 10, - TotalBlockBytes: 15, - InspectedTraces: 5, - InspectedBytes: 7, - CompletedJobs: 1, }, }, - }, - { - name: "200+200", - response1: toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "1234", - StartTimeUnixNano: 1, + { + name: "200+200", + response1: toHTTPResponse(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "1234", + StartTimeUnixNano: 1, + }, }, - }, - Metrics: &tempopb.SearchMetrics{ - InspectedTraces: 1, - TotalBlocks: 2, - InspectedBytes: 3, - }, - }, 200), - response2: toHTTPResponse(t, &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "5678", - StartTimeUnixNano: 0, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 1, + TotalBlocks: 2, + InspectedBytes: 3, }, - }, - Metrics: &tempopb.SearchMetrics{ - InspectedTraces: 5, - TotalBlocks: 6, - InspectedBytes: 7, - }, - }, 200), - expectedStatus: 200, - expectedResponse: &tempopb.SearchResponse{ - Traces: []*tempopb.TraceSearchMetadata{ - { - TraceID: "1234", - StartTimeUnixNano: 1, - RootServiceName: search.RootSpanNotYetReceivedText, + }, 200), + response2: toHTTPResponse(t, &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "5678", + StartTimeUnixNano: 0, + }, }, - { - TraceID: "5678", - StartTimeUnixNano: 0, - RootServiceName: search.RootSpanNotYetReceivedText, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 5, + TotalBlocks: 6, + InspectedBytes: 7, + }, + }, 200), + expectedStatus: 200, + expectedResponse: &tempopb.SearchResponse{ + Traces: []*tempopb.TraceSearchMetadata{ + { + TraceID: "1234", + StartTimeUnixNano: 1, + RootServiceName: search.RootSpanNotYetReceivedText, + }, + { + TraceID: "5678", + StartTimeUnixNano: 0, + RootServiceName: search.RootSpanNotYetReceivedText, + }, + }, + Metrics: &tempopb.SearchMetrics{ + InspectedTraces: 6, + InspectedBytes: 10, + CompletedJobs: 2, }, - }, - Metrics: &tempopb.SearchMetrics{ - InspectedTraces: 6, - InspectedBytes: 10, - CompletedJobs: 2, }, }, - }, - } + } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - combiner := NewTypedSearch(20) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + combiner := NewTypedSearch(20, keepMostRecent) - err := combiner.AddResponse(tc.response1) - require.NoError(t, err) - err = combiner.AddResponse(tc.response2) - require.NoError(t, err) + err := combiner.AddResponse(tc.response1) + require.NoError(t, err) + err = combiner.AddResponse(tc.response2) + require.NoError(t, err) - httpResp, err := combiner.HTTPFinal() - require.Equal(t, tc.expectedStatus, httpResp.StatusCode) - require.Equal(t, tc.expectedHTTPError, err) + httpResp, err := combiner.HTTPFinal() + require.Equal(t, tc.expectedStatus, httpResp.StatusCode) + require.Equal(t, tc.expectedHTTPError, err) - grpcresp, err := combiner.GRPCFinal() - require.Equal(t, tc.expectedGRPCError, err) - require.Equal(t, tc.expectedResponse, grpcresp) - }) + grpcresp, err := combiner.GRPCFinal() + require.Equal(t, tc.expectedGRPCError, err) + require.Equal(t, tc.expectedResponse, grpcresp) + }) + } } } -func TestCombinerDiffs(t *testing.T) { +func TestCombinerShards(t *testing.T) { tests := []struct { name string pipelineResponse PipelineResponse @@ -583,7 +644,8 @@ func TestCombinerDiffs(t *testing.T) { } // apply tests one at a time to the combiner and check expected results - combiner := NewTypedSearch(5) + + combiner := NewTypedSearch(5, true) for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { if tc.pipelineResponse != nil { diff --git a/modules/frontend/pipeline/responses_test.go b/modules/frontend/pipeline/responses_test.go index 966f5c1f56f..04f1ffbcaeb 100644 --- a/modules/frontend/pipeline/responses_test.go +++ b/modules/frontend/pipeline/responses_test.go @@ -304,7 +304,7 @@ func TestAsyncResponsesDoesNotLeak(t *testing.T) { bridge := &pipelineBridge{ next: tc.finalRT(cancel), } - httpCollector := NewHTTPCollector(sharder{next: bridge}, 0, combiner.NewSearch(0)) + httpCollector := NewHTTPCollector(sharder{next: bridge}, 0, combiner.NewSearch(0, false)) _, _ = httpCollector.RoundTrip(req) @@ -326,7 +326,7 @@ func TestAsyncResponsesDoesNotLeak(t *testing.T) { bridge := &pipelineBridge{ next: tc.finalRT(cancel), } - grpcCollector := NewGRPCCollector[*tempopb.SearchResponse](sharder{next: bridge}, 0, combiner.NewTypedSearch(0), func(_ *tempopb.SearchResponse) error { return nil }) + grpcCollector := NewGRPCCollector[*tempopb.SearchResponse](sharder{next: bridge}, 0, combiner.NewTypedSearch(0, false), func(_ *tempopb.SearchResponse) error { return nil }) _ = grpcCollector.RoundTrip(req) @@ -350,7 +350,7 @@ func TestAsyncResponsesDoesNotLeak(t *testing.T) { } s := sharder{next: sharder{next: bridge}, funcSharder: true} - grpcCollector := NewGRPCCollector[*tempopb.SearchResponse](s, 0, combiner.NewTypedSearch(0), func(_ *tempopb.SearchResponse) error { return nil }) + grpcCollector := NewGRPCCollector[*tempopb.SearchResponse](s, 0, combiner.NewTypedSearch(0, false), func(_ *tempopb.SearchResponse) error { return nil }) _ = grpcCollector.RoundTrip(req) @@ -373,7 +373,7 @@ func TestAsyncResponsesDoesNotLeak(t *testing.T) { } s := sharder{next: sharder{next: bridge, funcSharder: true}} - grpcCollector := NewGRPCCollector[*tempopb.SearchResponse](s, 0, combiner.NewTypedSearch(0), func(_ *tempopb.SearchResponse) error { return nil }) + grpcCollector := NewGRPCCollector[*tempopb.SearchResponse](s, 0, combiner.NewTypedSearch(0, false), func(_ *tempopb.SearchResponse) error { return nil }) _ = grpcCollector.RoundTrip(req) diff --git a/modules/frontend/search_handlers.go b/modules/frontend/search_handlers.go index 35875832016..0f819d514f3 100644 --- a/modules/frontend/search_handlers.go +++ b/modules/frontend/search_handlers.go @@ -20,6 +20,7 @@ import ( "github.com/grafana/tempo/pkg/api" "github.com/grafana/tempo/pkg/tempopb" + "github.com/grafana/tempo/pkg/traceql" ) // newSearchStreamingGRPCHandler returns a handler that streams results from the HTTP handler @@ -43,14 +44,14 @@ func newSearchStreamingGRPCHandler(cfg Config, next pipeline.AsyncRoundTripper[c tenant, _ := user.ExtractOrgID(ctx) start := time.Now() - limit, err := adjustLimit(req.Limit, cfg.Search.Sharder.DefaultLimit, cfg.Search.Sharder.MaxLimit) + comb, err := newCombiner(req, cfg.Search.Sharder) if err != nil { - level.Error(logger).Log("msg", "search streaming: adjust limit failed", "err", err) - return status.Errorf(codes.InvalidArgument, "adjust limit: %s", err.Error()) + level.Error(logger).Log("msg", "search streaming: could not create combiner", "err", err) + return status.Error(codes.InvalidArgument, err.Error()) + } var finalResponse *tempopb.SearchResponse - comb := combiner.NewTypedSearch(int(limit)) collector := pipeline.NewGRPCCollector[*tempopb.SearchResponse](next, cfg.ResponseConsumers, comb, func(sr *tempopb.SearchResponse) error { finalResponse = sr // sadly we can't srv.Send directly into the collector. we need bytesProcessed for the SLO calculations return srv.Send(sr) @@ -89,10 +90,9 @@ func newSearchHTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.P }, nil } - // build combiner with limit - limit, err := adjustLimit(searchReq.Limit, cfg.Search.Sharder.DefaultLimit, cfg.Search.Sharder.MaxLimit) + comb, err := newCombiner(searchReq, cfg.Search.Sharder) if err != nil { - level.Error(logger).Log("msg", "search: adjust limit failed", "err", err) + level.Error(logger).Log("msg", "search: could not create combiner", "err", err) return &http.Response{ StatusCode: http.StatusBadRequest, Status: http.StatusText(http.StatusBadRequest), @@ -103,7 +103,6 @@ func newSearchHTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.P logRequest(logger, tenant, searchReq) // build and use roundtripper - comb := combiner.NewTypedSearch(int(limit)) rt := pipeline.NewHTTPCollector(next, cfg.ResponseConsumers, comb) resp, err := rt.RoundTrip(req) @@ -122,6 +121,25 @@ func newSearchHTTPHandler(cfg Config, next pipeline.AsyncRoundTripper[combiner.P }) } +func newCombiner(req *tempopb.SearchRequest, cfg SearchSharderConfig) (combiner.GRPCCombiner[*tempopb.SearchResponse], error) { + limit, err := adjustLimit(req.Limit, cfg.DefaultLimit, cfg.MaxLimit) + if err != nil { + return nil, err + } + + query, err := traceql.Parse(req.Query) + if err != nil { + return nil, fmt.Errorf("invalid TraceQL query: %s", err) + } + + var mostRecent, ok bool + if mostRecent, ok = query.Hints.GetBool(traceql.HintMostRecent, false); !ok { + mostRecent = false + } + + return combiner.NewTypedSearch(int(limit), mostRecent), nil +} + // adjusts the limit based on provided config func adjustLimit(limit, defaultLimit, maxLimit uint32) (uint32, error) { if limit == 0 { diff --git a/modules/frontend/search_handlers_test.go b/modules/frontend/search_handlers_test.go index 43df9cfe2af..81e0e0c82ce 100644 --- a/modules/frontend/search_handlers_test.go +++ b/modules/frontend/search_handlers_test.go @@ -32,7 +32,6 @@ import ( "github.com/grafana/tempo/pkg/cache" "github.com/grafana/tempo/pkg/search" "github.com/grafana/tempo/pkg/tempopb" - "github.com/grafana/tempo/pkg/traceql" "github.com/grafana/tempo/pkg/util" "github.com/grafana/tempo/pkg/util/test" "github.com/grafana/tempo/tempodb" @@ -140,14 +139,14 @@ func TestFrontendSearch(t *testing.T) { func runnerBadRequestOnOrgID(t *testing.T, f *QueryFrontend) { // http - httpReq := httptest.NewRequest("GET", "/api/search", nil) + httpReq := httptest.NewRequest("GET", "/api/search?q={}", nil) httpResp := httptest.NewRecorder() f.SearchHandler.ServeHTTP(httpResp, httpReq) require.Equal(t, "no org id", httpResp.Body.String()) require.Equal(t, http.StatusBadRequest, httpResp.Code) // grpc - grpcReq := &tempopb.SearchRequest{} + grpcReq := &tempopb.SearchRequest{Query: "{}"} err := f.streamingSearch(grpcReq, newMockStreamingServer[*tempopb.SearchResponse]("", nil)) require.Equal(t, status.Error(codes.InvalidArgument, "no org id"), err) } @@ -273,7 +272,7 @@ func runnerRequests(t *testing.T, f *QueryFrontend) { func runnerClientCancelContext(t *testing.T, f *QueryFrontend) { // http - httpReq := httptest.NewRequest("GET", "/api/search", nil) + httpReq := httptest.NewRequest("GET", "/api/search?q={}", nil) httpResp := httptest.NewRecorder() ctx, cancel := context.WithCancel(httpReq.Context()) @@ -296,7 +295,7 @@ func runnerClientCancelContext(t *testing.T, f *QueryFrontend) { time.Sleep(50 * time.Millisecond) cancel() }() - grpcReq := &tempopb.SearchRequest{} + grpcReq := &tempopb.SearchRequest{Query: "{}"} err := f.streamingSearch(grpcReq, srv) require.Equal(t, status.Error(codes.Internal, "context canceled"), err) } @@ -385,7 +384,7 @@ func TestSearchLimitHonored(t *testing.T) { tenant := "1|2|3|4|5|6" // due to the blocks we will have 4 trace ids normally - httpReq := httptest.NewRequest("GET", "/api/search", nil) + httpReq := httptest.NewRequest("GET", "/api/search?q={}", nil) httpReq, err := api.BuildSearchRequest(httpReq, tc.request) require.NoError(t, err) @@ -406,18 +405,18 @@ func TestSearchLimitHonored(t *testing.T) { } // grpc - combiner := traceql.NewMetadataCombiner(100) + distinctTraces := map[string]struct{}{} err = f.streamingSearch(tc.request, newMockStreamingServer(tenant, func(i int, sr *tempopb.SearchResponse) { // combine for _, t := range sr.Traces { - combiner.AddMetadata(t) + distinctTraces[t.TraceID] = struct{}{} } })) if tc.badRequest { - require.Equal(t, status.Error(codes.InvalidArgument, "adjust limit: limit 20 exceeds max limit 15"), err) + require.Equal(t, status.Error(codes.InvalidArgument, "limit 20 exceeds max limit 15"), err) } else { require.NoError(t, err) - require.Equal(t, tc.expectedTraces, combiner.Count()) + require.Equal(t, tc.expectedTraces, len(distinctTraces)) } }) } @@ -503,7 +502,7 @@ func TestSearchFailurePropagatesFromQueriers(t *testing.T) { }, }, nil) - httpReq := httptest.NewRequest("GET", "/api/search?start=1&end=10000", nil) + httpReq := httptest.NewRequest("GET", "/api/search?start=1&end=10000&q={}", nil) httpResp := httptest.NewRecorder() ctx := user.InjectOrgID(httpReq.Context(), "foo") @@ -550,7 +549,7 @@ func TestSearchFailurePropagatesFromQueriers(t *testing.T) { // grpc srv := newMockStreamingServer[*tempopb.SearchResponse]("bar", nil) - grpcReq := &tempopb.SearchRequest{} + grpcReq := &tempopb.SearchRequest{Query: "{}"} err := f.streamingSearch(grpcReq, srv) require.Equal(t, tc.expectedErr, err) } diff --git a/modules/frontend/tag_handlers_test.go b/modules/frontend/tag_handlers_test.go index 23f46147f83..84b67b3e54c 100644 --- a/modules/frontend/tag_handlers_test.go +++ b/modules/frontend/tag_handlers_test.go @@ -16,6 +16,7 @@ import ( "github.com/gogo/protobuf/jsonpb" "github.com/gogo/protobuf/proto" "github.com/gogo/status" + "github.com/gorilla/mux" "github.com/grafana/dskit/user" "github.com/grafana/tempo/pkg/cache" "github.com/grafana/tempo/pkg/tempopb" @@ -49,7 +50,7 @@ func runnerTagsBadRequestOnOrgID(t *testing.T, f *QueryFrontend) { // http httpReq := httptest.NewRequest("GET", "/api/search/tags", nil) httpResp := httptest.NewRecorder() - f.SearchHandler.ServeHTTP(httpResp, httpReq) + f.SearchTagsHandler.ServeHTTP(httpResp, httpReq) require.Equal(t, "no org id", httpResp.Body.String()) require.Equal(t, http.StatusBadRequest, httpResp.Code) @@ -63,7 +64,7 @@ func runnerTagsV2BadRequestOnOrgID(t *testing.T, f *QueryFrontend) { // http httpReq := httptest.NewRequest("GET", "/api/v2/search/tags", nil) httpResp := httptest.NewRecorder() - f.SearchHandler.ServeHTTP(httpResp, httpReq) + f.SearchTagsV2Handler.ServeHTTP(httpResp, httpReq) require.Equal(t, "no org id", httpResp.Body.String()) require.Equal(t, http.StatusBadRequest, httpResp.Code) @@ -76,8 +77,9 @@ func runnerTagsV2BadRequestOnOrgID(t *testing.T, f *QueryFrontend) { func runnerTagValuesBadRequestOnOrgID(t *testing.T, f *QueryFrontend) { // http httpReq := httptest.NewRequest("GET", "/api/search/tag/foo/values", nil) + httpReq = mux.SetURLVars(httpReq, map[string]string{"tagName": "foo"}) httpResp := httptest.NewRecorder() - f.SearchHandler.ServeHTTP(httpResp, httpReq) + f.SearchTagsValuesHandler.ServeHTTP(httpResp, httpReq) require.Equal(t, "no org id", httpResp.Body.String()) require.Equal(t, http.StatusBadRequest, httpResp.Code) @@ -90,8 +92,9 @@ func runnerTagValuesBadRequestOnOrgID(t *testing.T, f *QueryFrontend) { func runnerTagValuesV2BadRequestOnOrgID(t *testing.T, f *QueryFrontend) { // http httpReq := httptest.NewRequest("GET", "/api/v2/search/tag/foo/values", nil) + httpReq = mux.SetURLVars(httpReq, map[string]string{"tagName": "foo"}) httpResp := httptest.NewRecorder() - f.SearchHandler.ServeHTTP(httpResp, httpReq) + f.SearchTagsValuesV2Handler.ServeHTTP(httpResp, httpReq) require.Equal(t, "no org id", httpResp.Body.String()) require.Equal(t, http.StatusBadRequest, httpResp.Code) @@ -115,7 +118,7 @@ func runnerTagsV2ClientCancelContext(t *testing.T, f *QueryFrontend) { cancel() }() - f.SearchHandler.ServeHTTP(httpResp, httpReq) + f.SearchTagsV2Handler.ServeHTTP(httpResp, httpReq) require.Equal(t, "context canceled", httpResp.Body.String()) require.Equal(t, 499, httpResp.Code) // todo: is this 499 valid? @@ -134,6 +137,7 @@ func runnerTagsV2ClientCancelContext(t *testing.T, f *QueryFrontend) { func runnerTagValuesV2ClientCancelContext(t *testing.T, f *QueryFrontend) { // http httpReq := httptest.NewRequest("GET", "/api/v2/search/tag/foo/values", nil) + httpReq = mux.SetURLVars(httpReq, map[string]string{"tagName": "foo"}) httpResp := httptest.NewRecorder() ctx, cancel := context.WithCancel(httpReq.Context()) @@ -145,7 +149,7 @@ func runnerTagValuesV2ClientCancelContext(t *testing.T, f *QueryFrontend) { cancel() }() - f.SearchHandler.ServeHTTP(httpResp, httpReq) + f.SearchTagsValuesV2Handler.ServeHTTP(httpResp, httpReq) require.Equal(t, "context canceled", httpResp.Body.String()) require.Equal(t, 499, httpResp.Code) // todo: is this 499 valid? @@ -248,7 +252,7 @@ func TestSearchTagsV2FailurePropagatesFromQueriers(t *testing.T) { ctx := user.InjectOrgID(httpReq.Context(), "foo") httpReq = httpReq.WithContext(ctx) - f.SearchHandler.ServeHTTP(httpResp, httpReq) + f.SearchTagsV2Handler.ServeHTTP(httpResp, httpReq) require.Equal(t, tc.expectedMessage, httpResp.Body.String()) require.Equal(t, tc.expectedCode, httpResp.Code) @@ -373,13 +377,14 @@ func TestSearchTagValuesV2FailurePropagatesFromQueriers(t *testing.T) { }, }, nil) - httpReq := httptest.NewRequest("GET", "/api/v2/search/tags?start=1&end=10000", nil) + httpReq := httptest.NewRequest("GET", "/api/v2/search/tag/foo/values?start=1&end=10000", nil) + httpReq = mux.SetURLVars(httpReq, map[string]string{"tagName": "foo"}) httpResp := httptest.NewRecorder() ctx := user.InjectOrgID(httpReq.Context(), "foo") httpReq = httpReq.WithContext(ctx) - f.SearchHandler.ServeHTTP(httpResp, httpReq) + f.SearchTagsValuesV2Handler.ServeHTTP(httpResp, httpReq) require.Equal(t, tc.expectedMessage, httpResp.Body.String()) require.Equal(t, tc.expectedCode, httpResp.Code) diff --git a/modules/ingester/ingester_test.go b/modules/ingester/ingester_test.go index f9d40b5aa52..5ab3418a50e 100644 --- a/modules/ingester/ingester_test.go +++ b/modules/ingester/ingester_test.go @@ -235,12 +235,10 @@ func TestSearchWAL(t *testing.T) { // search WAL ctx := user.InjectOrgID(context.Background(), "test") - searchReq := &tempopb.SearchRequest{Tags: map[string]string{ - "foo": "bar", - }} + searchReq := &tempopb.SearchRequest{Query: "{ }"} results, err := inst.Search(ctx, searchReq) require.NoError(t, err) - require.Equal(t, uint32(1), results.Metrics.InspectedTraces) + require.Equal(t, 1, len(results.Traces)) // Shutdown require.NoError(t, i.stopping(nil)) @@ -256,7 +254,7 @@ func TestSearchWAL(t *testing.T) { results, err = inst.Search(ctx, searchReq) require.NoError(t, err) - require.Equal(t, uint32(1), results.Metrics.InspectedTraces) + require.Equal(t, 1, len(results.Traces)) } // TODO - This test is flaky and commented out until it's fixed diff --git a/modules/ingester/instance_search.go b/modules/ingester/instance_search.go index 040d19afcb3..6d88bdf128e 100644 --- a/modules/ingester/instance_search.go +++ b/modules/ingester/instance_search.go @@ -40,9 +40,19 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem span.AddEvent("SearchRequest", trace.WithAttributes(attribute.String("request", req.String()))) + rootExpr, err := traceql.Parse(req.Query) + if err != nil { + return nil, fmt.Errorf("error parsing query: %w", err) + } + + var mostRecent, ok bool + if mostRecent, ok = rootExpr.Hints.GetBool(traceql.HintMostRecent, false); !ok { + mostRecent = false + } + var ( resultsMtx = sync.Mutex{} - combiner = traceql.NewMetadataCombiner(maxResults) + combiner = traceql.NewMetadataCombiner(maxResults, mostRecent) metrics = &tempopb.SearchMetrics{} opts = common.DefaultSearchOptions() anyErr atomic.Error @@ -58,12 +68,8 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem var resp *tempopb.SearchResponse var err error - // if blocks end time < the oldest entry in the combiner we can ignore b/c its impossible for a trace in this block - // to be more recent than our current results - // if endtime = 0 in means the block is not yet completed and we should always search it - if combiner.Count() >= maxResults && - !blockMeta.EndTime.IsZero() && - blockMeta.EndTime.Unix() < int64(combiner.OldestTimestampNanos()) { + // if the combiner is complete for the block's end time, we can skip searching it + if combiner.IsCompleteFor(uint32(blockMeta.EndTime.Unix())) { return } @@ -105,6 +111,10 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem for _, tr := range resp.Traces { combiner.AddMetadata(tr) + if combiner.IsCompleteFor(traceql.TimestampNever) { + cancel() + return + } } } @@ -124,12 +134,6 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem if err := anyErr.Load(); err != nil { return nil, err } - if combiner.Count() >= maxResults { - return &tempopb.SearchResponse{ - Traces: combiner.Metadata(), - Metrics: metrics, - }, nil - } // Search all other blocks (concurrently) // Lock blocks mutex until all search tasks are finished and this function exits. This avoids diff --git a/modules/ingester/instance_search_test.go b/modules/ingester/instance_search_test.go index cc91b3b4b27..74d17e236e1 100644 --- a/modules/ingester/instance_search_test.go +++ b/modules/ingester/instance_search_test.go @@ -43,9 +43,8 @@ func TestInstanceSearch(t *testing.T) { ids, _, _, _ := writeTracesForSearch(t, i, "", tagKey, tagValue, false, false) req := &tempopb.SearchRequest{ - Tags: map[string]string{}, + Query: fmt.Sprintf(`{ span.%s = "%s" }`, tagKey, tagValue), } - req.Tags[tagKey] = tagValue req.Limit = uint32(len(ids)) + 1 // Test after appending to WAL. writeTracesforSearch() makes sure all traces are in the wal @@ -188,27 +187,23 @@ func TestInstanceSearchWithStartAndEnd(t *testing.T) { searchAndAssert := func(req *tempopb.SearchRequest, inspectedTraces uint32) { sr := search(req, 0, 0) assert.Len(t, sr.Traces, len(ids)) - assert.Equal(t, sr.Metrics.InspectedTraces, inspectedTraces) checkEqual(t, ids, sr) // writeTracesForSearch will build spans that end 1 second from now // query 2 min range to have extra slack and always be within range sr = search(req, uint32(time.Now().Add(-5*time.Minute).Unix()), uint32(time.Now().Add(5*time.Minute).Unix())) assert.Len(t, sr.Traces, len(ids)) - assert.Equal(t, sr.Metrics.InspectedTraces, inspectedTraces) checkEqual(t, ids, sr) // search with start=5m from now, end=10m from now sr = search(req, uint32(time.Now().Add(5*time.Minute).Unix()), uint32(time.Now().Add(10*time.Minute).Unix())) // no results and should inspect 100 traces in wal assert.Len(t, sr.Traces, 0) - assert.Equal(t, uint32(0), sr.Metrics.InspectedTraces) } req := &tempopb.SearchRequest{ - Tags: map[string]string{}, + Query: fmt.Sprintf(`{ span.%s = "%s" }`, tagKey, tagValue), } - req.Tags[tagKey] = tagValue req.Limit = uint32(len(ids)) + 1 // Test after appending to WAL. @@ -612,7 +607,7 @@ func TestInstanceSearchNoData(t *testing.T) { i, _ := defaultInstance(t) req := &tempopb.SearchRequest{ - Tags: map[string]string{}, + Query: "{}", } sr, err := i.Search(context.Background(), req) @@ -634,7 +629,7 @@ func TestInstanceSearchDoesNotRace(t *testing.T) { tagValue := "bar" req := &tempopb.SearchRequest{ - Tags: map[string]string{tagKey: tagValue}, + Query: fmt.Sprintf(`{ span.%s = "%s" }`, tagKey, tagValue), } end := make(chan struct{}) @@ -764,11 +759,7 @@ func TestWALBlockDeletedDuringSearch(t *testing.T) { go concurrent(func() { _, err := i.Search(context.Background(), &tempopb.SearchRequest{ - Tags: map[string]string{ - // Not present in the data, so it will be an exhaustive - // search - "wuv": "xyz", - }, + Query: `{ span.wuv = "xyz" }`, }) require.NoError(t, err) }) @@ -811,7 +802,7 @@ func TestInstanceSearchMetrics(t *testing.T) { search := func() *tempopb.SearchMetrics { sr, err := i.Search(context.Background(), &tempopb.SearchRequest{ - Tags: map[string]string{"foo": "bar"}, + Query: fmt.Sprintf(`{ span.%s = "%s" }`, "foo", "bar"), }) require.NoError(t, err) return sr.Metrics @@ -826,14 +817,12 @@ func TestInstanceSearchMetrics(t *testing.T) { err := i.CutCompleteTraces(0, true) require.NoError(t, err) m = search() - require.Equal(t, numTraces, m.InspectedTraces) require.Less(t, numBytes, m.InspectedBytes) // Test after cutting new headblock blockID, err := i.CutBlockIfReady(0, 0, true) require.NoError(t, err) m = search() - require.Equal(t, numTraces, m.InspectedTraces) require.Less(t, numBytes, m.InspectedBytes) // Test after completing a block @@ -842,7 +831,7 @@ func TestInstanceSearchMetrics(t *testing.T) { err = i.ClearCompletingBlock(blockID) require.NoError(t, err) m = search() - require.Equal(t, numTraces, m.InspectedTraces) + require.Less(t, numBytes, m.InspectedBytes) } func BenchmarkInstanceSearchUnderLoad(b *testing.B) { diff --git a/pkg/traceql/combine.go b/pkg/traceql/combine.go index 73611280c99..1c7ab688a06 100644 --- a/pkg/traceql/combine.go +++ b/pkg/traceql/combine.go @@ -1,7 +1,9 @@ package traceql import ( + "math" "slices" + "sort" "strings" "time" @@ -9,23 +11,120 @@ import ( "github.com/grafana/tempo/pkg/util" ) -type MetadataCombiner struct { +type MetadataCombiner interface { + AddMetadata(new *tempopb.TraceSearchMetadata) bool + IsCompleteFor(ts uint32) bool + + Metadata() []*tempopb.TraceSearchMetadata + MetadataAfter(ts uint32) []*tempopb.TraceSearchMetadata + + addSpanset(new *Spanset) +} + +const TimestampNever = uint32(math.MaxUint32) +const TimestampAllTime = uint32(1) + +func NewMetadataCombiner(limit int, keepMostRecent bool) MetadataCombiner { + if keepMostRecent { + return newMostRecentCombiner(limit) + } + + return newAnyCombiner(limit) +} + +type anyCombiner struct { + trs map[string]*tempopb.TraceSearchMetadata + limit int +} + +func newAnyCombiner(limit int) *anyCombiner { + return &anyCombiner{ + trs: make(map[string]*tempopb.TraceSearchMetadata, limit), + limit: limit, + } +} + +// addSpanset adds a new spanset to the combiner. It only performs the asTraceSearchMetadata +// conversion if the spanset will be added +func (c *anyCombiner) addSpanset(new *Spanset) { + // else let's see if it's worth converting this to a metadata and adding it + // if it's already in the list, then we should add it + if _, ok := c.trs[util.TraceIDToHexString(new.TraceID)]; ok { + c.AddMetadata(asTraceSearchMetadata(new)) + return + } + + // if we don't have too many + if c.IsCompleteFor(0) { + return + } + + c.AddMetadata(asTraceSearchMetadata(new)) +} + +// AddMetadata adds the new metadata to the map. if it already exists +// use CombineSearchResults to combine the two +func (c *anyCombiner) AddMetadata(new *tempopb.TraceSearchMetadata) bool { + if existing, ok := c.trs[new.TraceID]; ok { + combineSearchResults(existing, new) + return true + } + + // if we don't have too many + if c.IsCompleteFor(0) { + return false + } + + c.trs[new.TraceID] = new + return true +} + +func (c *anyCombiner) Count() int { + return len(c.trs) +} + +func (c *anyCombiner) Exists(id string) bool { + _, ok := c.trs[id] + return ok +} + +func (c *anyCombiner) IsCompleteFor(_ uint32) bool { + return c.Count() >= c.limit && c.limit > 0 +} + +func (c *anyCombiner) Metadata() []*tempopb.TraceSearchMetadata { + m := make([]*tempopb.TraceSearchMetadata, 0, len(c.trs)) + for _, tr := range c.trs { + m = append(m, tr) + } + sort.Slice(m, func(i, j int) bool { + return m[i].StartTimeUnixNano > m[j].StartTimeUnixNano + }) + return m +} + +// MetadataAfter returns all traces that started after the given time. anyCombiner has no concept of time so it just returns all traces +func (c *anyCombiner) MetadataAfter(_ uint32) []*tempopb.TraceSearchMetadata { + return c.Metadata() +} + +type mostRecentCombiner struct { trs map[string]*tempopb.TraceSearchMetadata trsSorted []*tempopb.TraceSearchMetadata keepMostRecent int } -func NewMetadataCombiner(keepMostRecent int) *MetadataCombiner { - return &MetadataCombiner{ - trs: make(map[string]*tempopb.TraceSearchMetadata, keepMostRecent), - trsSorted: make([]*tempopb.TraceSearchMetadata, 0, keepMostRecent), - keepMostRecent: keepMostRecent, +func newMostRecentCombiner(limit int) *mostRecentCombiner { + return &mostRecentCombiner{ + trs: make(map[string]*tempopb.TraceSearchMetadata, limit), + trsSorted: make([]*tempopb.TraceSearchMetadata, 0, limit), + keepMostRecent: limit, } } -// AddSpanset adds a new spanset to the combiner. It only performs the asTraceSearchMetadata +// addSpanset adds a new spanset to the combiner. It only performs the asTraceSearchMetadata // conversion if the spanset will be added -func (c *MetadataCombiner) AddSpanset(new *Spanset) { +func (c *mostRecentCombiner) addSpanset(new *Spanset) { // if we're not configured to keep most recent then just add it if c.keepMostRecent == 0 || c.Count() < c.keepMostRecent { c.AddMetadata(asTraceSearchMetadata(new)) @@ -50,7 +149,7 @@ func (c *MetadataCombiner) AddSpanset(new *Spanset) { // AddMetadata adds the new metadata to the map. if it already exists // use CombineSearchResults to combine the two -func (c *MetadataCombiner) AddMetadata(new *tempopb.TraceSearchMetadata) bool { +func (c *mostRecentCombiner) AddMetadata(new *tempopb.TraceSearchMetadata) bool { if existing, ok := c.trs[new.TraceID]; ok { combineSearchResults(existing, new) return true @@ -80,21 +179,34 @@ func (c *MetadataCombiner) AddMetadata(new *tempopb.TraceSearchMetadata) bool { return true } -func (c *MetadataCombiner) Count() int { +func (c *mostRecentCombiner) Count() int { return len(c.trs) } -func (c *MetadataCombiner) Exists(id string) bool { +func (c *mostRecentCombiner) Exists(id string) bool { _, ok := c.trs[id] return ok } -func (c *MetadataCombiner) Metadata() []*tempopb.TraceSearchMetadata { +// IsCompleteFor returns true if the combiner has reached the limit and all traces are after the given time +func (c *mostRecentCombiner) IsCompleteFor(ts uint32) bool { + if ts == TimestampNever { + return false + } + + if c.Count() < c.keepMostRecent { + return false + } + + return c.OldestTimestampNanos() > uint64(ts)*uint64(time.Second) +} + +func (c *mostRecentCombiner) Metadata() []*tempopb.TraceSearchMetadata { return c.trsSorted } // MetadataAfter returns all traces that started after the given time -func (c *MetadataCombiner) MetadataAfter(afterSeconds uint32) []*tempopb.TraceSearchMetadata { +func (c *mostRecentCombiner) MetadataAfter(afterSeconds uint32) []*tempopb.TraceSearchMetadata { afterNanos := uint64(afterSeconds) * uint64(time.Second) afterTraces := make([]*tempopb.TraceSearchMetadata, 0, len(c.trsSorted)) @@ -107,7 +219,7 @@ func (c *MetadataCombiner) MetadataAfter(afterSeconds uint32) []*tempopb.TraceSe return afterTraces } -func (c *MetadataCombiner) OldestTimestampNanos() uint64 { +func (c *mostRecentCombiner) OldestTimestampNanos() uint64 { if len(c.trsSorted) == 0 { return 0 } diff --git a/pkg/traceql/combine_test.go b/pkg/traceql/combine_test.go index 43d950ba437..809795b738d 100644 --- a/pkg/traceql/combine_test.go +++ b/pkg/traceql/combine_test.go @@ -273,7 +273,7 @@ func TestCombineResults(t *testing.T) { func TestCombinerKeepsMostRecent(t *testing.T) { totalTraces := 10 keepMostRecent := 5 - combiner := NewMetadataCombiner(keepMostRecent) + combiner := NewMetadataCombiner(keepMostRecent, true).(*mostRecentCombiner) // make traces traces := make([]*Spanset, totalTraces) @@ -300,7 +300,7 @@ func TestCombinerKeepsMostRecent(t *testing.T) { // add to combiner for i := 0; i < totalTraces; i++ { - combiner.AddSpanset(traces[i]) + combiner.addSpanset(traces[i]) } // test that the most recent are kept diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go index f2657bc25de..a6570d92400 100644 --- a/pkg/traceql/engine.go +++ b/pkg/traceql/engine.go @@ -55,6 +55,11 @@ func (e *Engine) ExecuteSearch(ctx context.Context, searchReq *tempopb.SearchReq return nil, err } + var mostRecent, ok bool + if mostRecent, ok = rootExpr.Hints.GetBool(HintMostRecent, false); !ok { + mostRecent = false + } + fetchSpansRequest.StartTimeUnixNanos = unixSecToNano(searchReq.Start) fetchSpansRequest.EndTimeUnixNanos = unixSecToNano(searchReq.End) @@ -111,7 +116,7 @@ func (e *Engine) ExecuteSearch(ctx context.Context, searchReq *tempopb.SearchReq Traces: nil, Metrics: &tempopb.SearchMetrics{}, } - combiner := NewMetadataCombiner(int(searchReq.Limit)) + combiner := NewMetadataCombiner(int(searchReq.Limit), mostRecent) for { spanset, err := iterator.Next(ctx) if err != nil && !errors.Is(err, io.EOF) { @@ -122,7 +127,10 @@ func (e *Engine) ExecuteSearch(ctx context.Context, searchReq *tempopb.SearchReq break } - combiner.AddSpanset(spanset) + combiner.addSpanset(spanset) + if combiner.IsCompleteFor(TimestampNever) { + break + } } res.Traces = combiner.Metadata() diff --git a/pkg/traceql/enum_hints.go b/pkg/traceql/enum_hints.go index e95de1fda93..5e09ffd3b4b 100644 --- a/pkg/traceql/enum_hints.go +++ b/pkg/traceql/enum_hints.go @@ -12,11 +12,12 @@ const ( HintTimeOverlapCutoff = "time_overlap_cutoff" HintConcurrentBlocks = "concurrent_blocks" HintExemplars = "exemplars" + HintMostRecent = "most_recent" // traceql search hint to return most recent results ordered by time ) func isUnsafe(h string) bool { switch h { - case HintSample, HintExemplars: + case HintSample, HintExemplars, HintMostRecent: return false default: return true From 67a3cbb31a8fe3f4b4d6132a0f4f321beb5dcaca Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Fri, 25 Oct 2024 15:45:51 -0400 Subject: [PATCH 3/9] changelog, docs and lint Signed-off-by: Joe Elliott --- CHANGELOG.md | 1 + docs/sources/tempo/traceql/_index.md | 9 +++++++++ modules/frontend/search_sharder.go | 6 ++++-- pkg/traceql/combine.go | 1 - 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e58871a3e44..32d1c662a6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ * [ENHANCEMENT] Speedup collection of results from ingesters in the querier [#4100](https://github.com/grafana/tempo/pull/4100) (@electron0zero) * [ENHANCEMENT] Speedup DistinctValue collector and exit early for ingesters [#4104](https://github.com/grafana/tempo/pull/4104) (@electron0zero) * [ENHANCEMENT] Add disk caching in ingester SearchTagValuesV2 for completed blocks [#4069](https://github.com/grafana/tempo/pull/4069) (@electron0zero) +* [ENHANCEMENT] Added most_recent=true query hint to TraceQL to return most recent results. [#4238](https://github.com/grafana/tempo/pull/4238) (@joe-elliott) * [BUGFIX] Replace hedged requests roundtrips total with a counter. [#4063](https://github.com/grafana/tempo/pull/4063) [#4078](https://github.com/grafana/tempo/pull/4078) (@galalen) * [BUGFIX] Metrics generators: Correctly drop from the ring before stopping ingestion to reduce drops during a rollout. [#4101](https://github.com/grafana/tempo/pull/4101) (@joe-elliott) * [BUGFIX] Correctly handle 400 Bad Request and 404 Not Found in gRPC streaming [#4144](https://github.com/grafana/tempo/pull/4144) (@mapno) diff --git a/docs/sources/tempo/traceql/_index.md b/docs/sources/tempo/traceql/_index.md index cc97015b423..1742f259d2d 100644 --- a/docs/sources/tempo/traceql/_index.md +++ b/docs/sources/tempo/traceql/_index.md @@ -451,6 +451,15 @@ TraceQL can select arbitrary fields from spans. This is particularly performant { status=error } | select(span.http.status_code, span.http.url) ``` +## Retrieving most recent results (experimental) + +The TraceQL query hint `most_recent=true` can be used with any TraceQL selection query to force Tempo to return the most recent results ordered by time. Examples: + +``` +{} with (most_recent=true) +{ span.foo = "bar" } >> { status = error } with (most_recent=true) +``` + ## Experimental TraceQL metrics TraceQL metrics are experimental, but easy to get started with. Refer to [the TraceQL metrics]({{< relref "../operations/traceql-metrics.md" >}}) documentation for more information. diff --git a/modules/frontend/search_sharder.go b/modules/frontend/search_sharder.go index 16fd6f48fcc..6fe03ad11e1 100644 --- a/modules/frontend/search_sharder.go +++ b/modules/frontend/search_sharder.go @@ -405,8 +405,10 @@ func buildIngesterRequestChan(ctx context.Context, tenantID string, parent pipel return nil } -type shardIterFn func(jobs int, sz uint64, completedThroughTime uint32) -type jobIterFn func(m *backend.BlockMeta, shard, startPage, pages int) +type ( + shardIterFn func(jobs int, sz uint64, completedThroughTime uint32) + jobIterFn func(m *backend.BlockMeta, shard, startPage, pages int) +) // backendJobsFunc provides an iter func with 2 callbacks designed to be used once to calculate job and shard metrics and a second time // to generate actual jobs. diff --git a/pkg/traceql/combine.go b/pkg/traceql/combine.go index 1c7ab688a06..a25b01619e0 100644 --- a/pkg/traceql/combine.go +++ b/pkg/traceql/combine.go @@ -22,7 +22,6 @@ type MetadataCombiner interface { } const TimestampNever = uint32(math.MaxUint32) -const TimestampAllTime = uint32(1) func NewMetadataCombiner(limit int, keepMostRecent bool) MetadataCombiner { if keepMostRecent { From 19c0cab2383db3836789705d632feb96215c1c31 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Fri, 25 Oct 2024 16:22:08 -0400 Subject: [PATCH 4/9] e2e tests - fixed tag search Signed-off-by: Joe Elliott --- modules/frontend/search_handlers.go | 17 ++++++++++------- modules/ingester/instance_search.go | 17 ++++++++++------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/modules/frontend/search_handlers.go b/modules/frontend/search_handlers.go index 0f819d514f3..190b47762b8 100644 --- a/modules/frontend/search_handlers.go +++ b/modules/frontend/search_handlers.go @@ -127,14 +127,17 @@ func newCombiner(req *tempopb.SearchRequest, cfg SearchSharderConfig) (combiner. return nil, err } - query, err := traceql.Parse(req.Query) - if err != nil { - return nil, fmt.Errorf("invalid TraceQL query: %s", err) - } + mostRecent := false + if len(req.Query) > 0 { + query, err := traceql.Parse(req.Query) + if err != nil { + return nil, fmt.Errorf("invalid TraceQL query: %s", err) + } - var mostRecent, ok bool - if mostRecent, ok = query.Hints.GetBool(traceql.HintMostRecent, false); !ok { - mostRecent = false + ok := false + if mostRecent, ok = query.Hints.GetBool(traceql.HintMostRecent, false); !ok { + mostRecent = false + } } return combiner.NewTypedSearch(int(limit), mostRecent), nil diff --git a/modules/ingester/instance_search.go b/modules/ingester/instance_search.go index 6d88bdf128e..2e7dfc1c488 100644 --- a/modules/ingester/instance_search.go +++ b/modules/ingester/instance_search.go @@ -40,14 +40,17 @@ func (i *instance) Search(ctx context.Context, req *tempopb.SearchRequest) (*tem span.AddEvent("SearchRequest", trace.WithAttributes(attribute.String("request", req.String()))) - rootExpr, err := traceql.Parse(req.Query) - if err != nil { - return nil, fmt.Errorf("error parsing query: %w", err) - } + mostRecent := false + if len(req.Query) > 0 { + rootExpr, err := traceql.Parse(req.Query) + if err != nil { + return nil, fmt.Errorf("error parsing query: %w", err) + } - var mostRecent, ok bool - if mostRecent, ok = rootExpr.Hints.GetBool(traceql.HintMostRecent, false); !ok { - mostRecent = false + ok := false + if mostRecent, ok = rootExpr.Hints.GetBool(traceql.HintMostRecent, false); !ok { + mostRecent = false + } } var ( From 2d1c5a994e9cbba6bea4d6ee2dc9b8bfa4692e40 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Fri, 25 Oct 2024 17:15:53 -0400 Subject: [PATCH 5/9] lint Signed-off-by: Joe Elliott --- modules/frontend/search_sharder_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/frontend/search_sharder_test.go b/modules/frontend/search_sharder_test.go index 3aeb2be7dca..509ce8a432b 100644 --- a/modules/frontend/search_sharder_test.go +++ b/modules/frontend/search_sharder_test.go @@ -979,7 +979,7 @@ func TestBackendShards(t *testing.T) { fn := backendJobsFunc(metas, defaultTargetBytesPerRequest, tc.maxShards, tc.searchEnd) actualShards := []combiner.SearchShards{} - fn(func(jobs int, sz uint64, completedThroughTime uint32) { + fn(func(jobs int, _ uint64, completedThroughTime uint32) { actualShards = append(actualShards, combiner.SearchShards{ TotalJobs: uint32(jobs), CompletedThroughSeconds: completedThroughTime, From 4bd00bf7203ee339b059d69aab92703c73d4b098 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Mon, 28 Oct 2024 08:03:47 -0400 Subject: [PATCH 6/9] remove clone changes Signed-off-by: Joe Elliott --- .../frontend/pipeline/async_handler_multitenant.go | 12 +++++------- modules/frontend/pipeline/pipeline.go | 9 --------- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/modules/frontend/pipeline/async_handler_multitenant.go b/modules/frontend/pipeline/async_handler_multitenant.go index c8c2f6bd4d2..984787d7113 100644 --- a/modules/frontend/pipeline/async_handler_multitenant.go +++ b/modules/frontend/pipeline/async_handler_multitenant.go @@ -60,15 +60,13 @@ func (t *tenantRoundTripper) RoundTrip(req Request) (Responses[combiner.Pipeline // requestForTenant makes a copy of request and injects the tenant id into context and Header. // this allows us to keep all multi-tenant logic in query frontend and keep other components single tenant func requestForTenant(req Request, tenant string) Request { - clonedR := req.Clone() - - r := clonedR.HTTPRequest() - r.Header.Set(user.OrgIDHeaderName, tenant) - + r := req.HTTPRequest() ctx := r.Context() - clonedR.WithContext(user.InjectOrgID(ctx, tenant)) - return clonedR + ctx = user.InjectOrgID(ctx, tenant) + rCopy := r.Clone(ctx) + rCopy.Header.Set(user.OrgIDHeaderName, tenant) + return NewHTTPRequest(rCopy) } type unsupportedRoundTripper struct { diff --git a/modules/frontend/pipeline/pipeline.go b/modules/frontend/pipeline/pipeline.go index a2e18cb3ae1..920cd4eb203 100644 --- a/modules/frontend/pipeline/pipeline.go +++ b/modules/frontend/pipeline/pipeline.go @@ -14,7 +14,6 @@ type Request interface { HTTPRequest() *http.Request Context() context.Context WithContext(context.Context) - Clone() Request Weight() int SetWeight(int) @@ -51,14 +50,6 @@ func (r HTTPRequest) Context() context.Context { return r.req.Context() } -func (r *HTTPRequest) Clone() Request { - return &HTTPRequest{ - req: r.req.Clone(r.req.Context()), - cacheKey: r.cacheKey, - responseData: r.responseData, - } -} - func (r *HTTPRequest) WithContext(ctx context.Context) { r.req = r.req.WithContext(ctx) } From f09ea566b83075fbfc922132fe37471632a64d01 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Thu, 7 Nov 2024 11:00:48 -0500 Subject: [PATCH 7/9] review Signed-off-by: Joe Elliott --- modules/frontend/combiner/search.go | 30 ++++++++++++------------- modules/frontend/search_sharder.go | 10 ++++----- modules/frontend/search_sharder_test.go | 4 +++- pkg/traceql/combine.go | 1 - 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/modules/frontend/combiner/search.go b/modules/frontend/combiner/search.go index 6f164714590..f4dca2cddde 100644 --- a/modules/frontend/combiner/search.go +++ b/modules/frontend/combiner/search.go @@ -176,8 +176,7 @@ func (s *ShardCompletionTracker) addShards(shards []SearchShards) uint32 { s.foundResponses = temp } - for s.incrementCurShardIfComplete() { - } + s.incrementCurShardIfComplete() return s.completedThroughSeconds } @@ -205,23 +204,24 @@ func (s *ShardCompletionTracker) addShardIdx(shardIdx int) uint32 { } s.foundResponses[shardIdx]++ - for s.incrementCurShardIfComplete() { - } + s.incrementCurShardIfComplete() return s.completedThroughSeconds } -func (s *ShardCompletionTracker) incrementCurShardIfComplete() bool { - if s.curShard >= len(s.shards) { - return false - } - - if s.foundResponses[s.curShard] == int(s.shards[s.curShard].TotalJobs) { - s.completedThroughSeconds = s.shards[s.curShard].CompletedThroughSeconds - s.curShard++ +// incrementCurShardIfComplete tests to see if the current shard is complete and increments it if so. +// it does this repeatedly until it finds a shard that is not complete. +func (s *ShardCompletionTracker) incrementCurShardIfComplete() { + for { + if s.curShard >= len(s.shards) { + break + } - return true + if s.foundResponses[s.curShard] == int(s.shards[s.curShard].TotalJobs) { + s.completedThroughSeconds = s.shards[s.curShard].CompletedThroughSeconds + s.curShard++ + } else { + break + } } - - return false } diff --git a/modules/frontend/search_sharder.go b/modules/frontend/search_sharder.go index d7b39e5d25f..1bdb5c36e9a 100644 --- a/modules/frontend/search_sharder.go +++ b/modules/frontend/search_sharder.go @@ -105,7 +105,7 @@ func (s asyncSearchSharder) RoundTrip(pipelineRequest pipeline.Request) (pipelin } // pass subCtx in requests so we can cancel and exit early - jobMetrics = s.backendRequests(ctx, tenantID, pipelineRequest, searchReq, jobMetrics, reqCh, func(err error) { + s.backendRequests(ctx, tenantID, pipelineRequest, searchReq, jobMetrics, reqCh, func(err error) { // todo: actually find a way to return this error to the user s.logger.Log("msg", "search: failed to build backend requests", "err", err) }) @@ -136,11 +136,11 @@ func (s *asyncSearchSharder) blockMetas(start, end uint32, tenantID string) []*b // backendRequest builds backend requests to search backend blocks. backendRequest takes ownership of reqCh and closes it. // it returns 3 int values: totalBlocks, totalBlockBytes, and estimated jobs -func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, resp *combiner.SearchJobResponse, reqCh chan<- pipeline.Request, errFn func(error)) *combiner.SearchJobResponse { +func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq *tempopb.SearchRequest, resp *combiner.SearchJobResponse, reqCh chan<- pipeline.Request, errFn func(error)) { // request without start or end, search only in ingester if searchReq.Start == 0 || searchReq.End == 0 { close(reqCh) - return resp + return } // calculate duration (start and end) to search the backend blocks @@ -149,7 +149,7 @@ func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID strin // no need to search backend if start == end { close(reqCh) - return resp + return } blocks := s.blockMetas(start, end, tenantID) @@ -171,8 +171,6 @@ func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID strin go func() { buildBackendRequests(ctx, tenantID, parent, searchReq, blockIter, reqCh, errFn) }() - - return resp } // ingesterRequest returns a new start and end time range for the backend as well as an http request diff --git a/modules/frontend/search_sharder_test.go b/modules/frontend/search_sharder_test.go index b6b9f27943f..4f85ffe733e 100644 --- a/modules/frontend/search_sharder_test.go +++ b/modules/frontend/search_sharder_test.go @@ -319,7 +319,9 @@ func TestBackendRequests(t *testing.T) { ctx, cancelCause := context.WithCancelCause(context.Background()) pipelineRequest := pipeline.NewHTTPRequest(r) - searchJobResponse := s.backendRequests(ctx, "test", pipelineRequest, searchReq, &combiner.SearchJobResponse{}, reqCh, cancelCause) + + searchJobResponse := &combiner.SearchJobResponse{} + s.backendRequests(ctx, "test", pipelineRequest, searchReq, searchJobResponse, reqCh, cancelCause) require.Equal(t, tc.expectedJobs, searchJobResponse.TotalJobs) require.Equal(t, tc.expectedBlocks, searchJobResponse.TotalBlocks) require.Equal(t, tc.expectedBlockBytes, searchJobResponse.TotalBytes) diff --git a/pkg/traceql/combine.go b/pkg/traceql/combine.go index a25b01619e0..eda545df297 100644 --- a/pkg/traceql/combine.go +++ b/pkg/traceql/combine.go @@ -46,7 +46,6 @@ func newAnyCombiner(limit int) *anyCombiner { // addSpanset adds a new spanset to the combiner. It only performs the asTraceSearchMetadata // conversion if the spanset will be added func (c *anyCombiner) addSpanset(new *Spanset) { - // else let's see if it's worth converting this to a metadata and adding it // if it's already in the list, then we should add it if _, ok := c.trs[util.TraceIDToHexString(new.TraceID)]; ok { c.AddMetadata(asTraceSearchMetadata(new)) From d5cc9f44511187f680f074686435b2bc06bb3846 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Thu, 7 Nov 2024 11:58:26 -0500 Subject: [PATCH 8/9] make shards configurable Signed-off-by: Joe Elliott --- docs/sources/tempo/configuration/_index.md | 8 +++-- modules/frontend/config.go | 1 + modules/frontend/frontend.go | 4 +++ modules/frontend/frontend_test.go | 35 +++++++++++++++++++++- modules/frontend/search_handlers_test.go | 4 +++ modules/frontend/search_sharder.go | 8 ++--- modules/frontend/search_sharder_test.go | 9 ++++-- modules/frontend/tag_handlers_test.go | 4 +++ modules/frontend/tag_sharder_test.go | 2 ++ modules/frontend/traceid_handlers_test.go | 1 + 10 files changed, 67 insertions(+), 9 deletions(-) diff --git a/docs/sources/tempo/configuration/_index.md b/docs/sources/tempo/configuration/_index.md index 3e48575f5e6..57d3a5e34fd 100644 --- a/docs/sources/tempo/configuration/_index.md +++ b/docs/sources/tempo/configuration/_index.md @@ -639,8 +639,12 @@ query_frontend: [throughput_bytes_slo: | default = 0 ] # The number of shards to break ingester queries into. - [ingester_shards]: | default = 1] - + [ingester_shards: | default = 1] + + # The number of time windows to break a search up into when doing a most recent TraceQL search. This only impacts TraceQL + # searches with (most_recent=true) + [most_recent_shards: | default = 200] + # SLO configuration for Metadata (tags and tag values) endpoints. metadata_slo: # If set to a non-zero value, it's value will be used to decide if metadata query is within SLO or not. diff --git a/modules/frontend/config.go b/modules/frontend/config.go index 0ba194df4b2..35e7c2b7a0a 100644 --- a/modules/frontend/config.go +++ b/modules/frontend/config.go @@ -79,6 +79,7 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(string, *flag.FlagSet) { ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, IngesterShards: 1, + MostRecentShards: defaultMostRecentShards, }, SLO: slo, } diff --git a/modules/frontend/frontend.go b/modules/frontend/frontend.go index 9b6a472f6b7..7315849f264 100644 --- a/modules/frontend/frontend.go +++ b/modules/frontend/frontend.go @@ -78,6 +78,10 @@ func New(cfg Config, next pipeline.RoundTripper, o overrides.Interface, reader t return nil, fmt.Errorf("query backend after should be less than or equal to query ingester until") } + if cfg.Search.Sharder.MostRecentShards <= 0 { + return nil, fmt.Errorf("most recent shards must be greater than 0") + } + if cfg.Metrics.Sharder.ConcurrentRequests <= 0 { return nil, fmt.Errorf("frontend metrics concurrent requests should be greater than 0") } diff --git a/modules/frontend/frontend_test.go b/modules/frontend/frontend_test.go index d21214af70e..7d0f23bf199 100644 --- a/modules/frontend/frontend_test.go +++ b/modules/frontend/frontend_test.go @@ -26,6 +26,7 @@ func TestFrontendTagSearchRequiresOrgID(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -69,12 +70,12 @@ func TestFrontendBadConfigFails(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, }, nil, nil, nil, nil, "", log.NewNopLogger(), nil) assert.EqualError(t, err, "frontend query shards should be between 2 and 100000 (both inclusive)") - assert.Nil(t, f) f, err = New(Config{ @@ -86,6 +87,7 @@ func TestFrontendBadConfigFails(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -102,6 +104,7 @@ func TestFrontendBadConfigFails(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: 0, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -118,6 +121,7 @@ func TestFrontendBadConfigFails(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: 0, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -134,6 +138,7 @@ func TestFrontendBadConfigFails(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, QueryIngestersUntil: time.Minute, QueryBackendAfter: time.Hour, }, @@ -151,6 +156,7 @@ func TestFrontendBadConfigFails(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -166,6 +172,7 @@ func TestFrontendBadConfigFails(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -188,6 +195,7 @@ func TestFrontendBadConfigFails(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -202,4 +210,29 @@ func TestFrontendBadConfigFails(t *testing.T) { }, nil, nil, nil, nil, "", log.NewNopLogger(), nil) assert.EqualError(t, err, "frontend metrics interval should be greater than 0") assert.Nil(t, f) + + f, err = New(Config{ + TraceByID: TraceByIDConfig{ + QueryShards: maxQueryShards, + }, + Search: SearchConfig{ + Sharder: SearchSharderConfig{ + ConcurrentRequests: defaultConcurrentRequests, + TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: 0, + }, + SLO: testSLOcfg, + }, + Metrics: MetricsConfig{ + Sharder: QueryRangeSharderConfig{ + ConcurrentRequests: defaultConcurrentRequests, + TargetBytesPerRequest: defaultTargetBytesPerRequest, + Interval: 5 * time.Minute, + }, + SLO: testSLOcfg, + }, + }, nil, nil, nil, nil, "", log.NewNopLogger(), nil) + assert.EqualError(t, err, "most recent shards must be greater than 0") + assert.Nil(t, f) + } diff --git a/modules/frontend/search_handlers_test.go b/modules/frontend/search_handlers_test.go index 81e0e0c82ce..7e808ba7a65 100644 --- a/modules/frontend/search_handlers_test.go +++ b/modules/frontend/search_handlers_test.go @@ -326,6 +326,7 @@ func TestSearchLimitHonored(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, DefaultLimit: 10, MaxLimit: 15, }, @@ -489,6 +490,7 @@ func TestSearchFailurePropagatesFromQueriers(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -534,6 +536,7 @@ func TestSearchFailurePropagatesFromQueriers(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -780,6 +783,7 @@ func frontendWithSettings(t require.TestingT, next pipeline.RoundTripper, rdr te Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, diff --git a/modules/frontend/search_sharder.go b/modules/frontend/search_sharder.go index 1bdb5c36e9a..4f3d8cfff52 100644 --- a/modules/frontend/search_sharder.go +++ b/modules/frontend/search_sharder.go @@ -25,8 +25,7 @@ import ( const ( defaultTargetBytesPerRequest = 100 * 1024 * 1024 defaultConcurrentRequests = 1000 - // every search is broken into the same number of search shards. these shards are used to stream partial results back that are guaranteed to be latest results - maxSearchShards = 200 + defaultMostRecentShards = 200 ) type SearchSharderConfig struct { @@ -38,6 +37,7 @@ type SearchSharderConfig struct { QueryBackendAfter time.Duration `yaml:"query_backend_after,omitempty"` QueryIngestersUntil time.Duration `yaml:"query_ingesters_until,omitempty"` IngesterShards int `yaml:"ingester_shards,omitempty"` + MostRecentShards int `yaml:"most_recent_shards,omitempty"` } type asyncSearchSharder struct { @@ -157,7 +157,7 @@ func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID strin // calculate metrics to return to the caller resp.TotalBlocks = len(blocks) - blockIter := backendJobsFunc(blocks, s.cfg.TargetBytesPerRequest, maxSearchShards, searchReq.End) + blockIter := backendJobsFunc(blocks, s.cfg.TargetBytesPerRequest, s.cfg.MostRecentShards, searchReq.End) blockIter(func(jobs int, sz uint64, completedThroughTime uint32) { resp.TotalJobs += jobs resp.TotalBytes += sz @@ -179,7 +179,7 @@ func (s *asyncSearchSharder) backendRequests(ctx context.Context, tenantID strin // unexpectedly changing the passed searchReq. func (s *asyncSearchSharder) ingesterRequests(tenantID string, parent pipeline.Request, searchReq tempopb.SearchRequest, reqCh chan pipeline.Request) (*combiner.SearchJobResponse, error) { resp := &combiner.SearchJobResponse{ - Shards: make([]combiner.SearchShards, 0, maxSearchShards+1), // +1 for the ingester shard + Shards: make([]combiner.SearchShards, 0, s.cfg.MostRecentShards+1), // +1 for the ingester shard } // request without start or end, search only in ingester diff --git a/modules/frontend/search_sharder_test.go b/modules/frontend/search_sharder_test.go index 4f85ffe733e..a0951ed7852 100644 --- a/modules/frontend/search_sharder_test.go +++ b/modules/frontend/search_sharder_test.go @@ -225,7 +225,7 @@ func TestBuildBackendRequests(t *testing.T) { ctx, cancelCause := context.WithCancelCause(context.Background()) reqCh := make(chan pipeline.Request) - iterFn := backendJobsFunc(tc.metas, tc.targetBytesPerRequest, maxSearchShards, math.MaxUint32) + iterFn := backendJobsFunc(tc.metas, tc.targetBytesPerRequest, defaultMostRecentShards, math.MaxUint32) go func() { buildBackendRequests(ctx, "test", pipeline.NewHTTPRequest(req), searchReq, iterFn, reqCh, cancelCause) @@ -252,7 +252,9 @@ func TestBackendRequests(t *testing.T) { bm.TotalRecords = 2 s := &asyncSearchSharder{ - cfg: SearchSharderConfig{}, + cfg: SearchSharderConfig{ + MostRecentShards: defaultMostRecentShards, + }, reader: &mockReader{metas: []*backend.BlockMeta{bm}}, } @@ -682,6 +684,7 @@ func TestTotalJobsIncludesIngester(t *testing.T) { QueryIngestersUntil: 15 * time.Minute, ConcurrentRequests: 1, // 1 concurrent request to force order TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, IngesterShards: 1, }, log.NewNopLogger()) testRT := sharder.Wrap(next) @@ -726,6 +729,7 @@ func TestSearchSharderRoundTripBadRequest(t *testing.T) { sharder := newAsyncSearchSharder(&mockReader{}, o, SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, MaxDuration: 5 * time.Minute, }, log.NewNopLogger()) testRT := sharder.Wrap(next) @@ -759,6 +763,7 @@ func TestSearchSharderRoundTripBadRequest(t *testing.T) { sharder = newAsyncSearchSharder(&mockReader{}, o, SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, MaxDuration: 5 * time.Minute, }, log.NewNopLogger()) testRT = sharder.Wrap(next) diff --git a/modules/frontend/tag_handlers_test.go b/modules/frontend/tag_handlers_test.go index 84b67b3e54c..4054c0619f1 100644 --- a/modules/frontend/tag_handlers_test.go +++ b/modules/frontend/tag_handlers_test.go @@ -233,6 +233,7 @@ func TestSearchTagsV2FailurePropagatesFromQueriers(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -278,6 +279,7 @@ func TestSearchTagsV2FailurePropagatesFromQueriers(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -364,6 +366,7 @@ func TestSearchTagValuesV2FailurePropagatesFromQueriers(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, @@ -410,6 +413,7 @@ func TestSearchTagValuesV2FailurePropagatesFromQueriers(t *testing.T) { Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, diff --git a/modules/frontend/tag_sharder_test.go b/modules/frontend/tag_sharder_test.go index e47fa87f35c..251a137f7fd 100644 --- a/modules/frontend/tag_sharder_test.go +++ b/modules/frontend/tag_sharder_test.go @@ -294,6 +294,7 @@ func TestTagsSearchSharderRoundTripBadRequest(t *testing.T) { sharder := newAsyncTagSharder(&mockReader{}, o, SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, MaxDuration: 5 * time.Minute, }, parseTagsRequest, log.NewNopLogger()) testRT := sharder.Wrap(next) @@ -328,6 +329,7 @@ func TestTagsSearchSharderRoundTripBadRequest(t *testing.T) { sharder = newAsyncTagSharder(&mockReader{}, o, SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, MaxDuration: 5 * time.Minute, }, parseTagsRequest, log.NewNopLogger()) testRT = sharder.Wrap(next) diff --git a/modules/frontend/traceid_handlers_test.go b/modules/frontend/traceid_handlers_test.go index 69e16abe036..a1d773769de 100644 --- a/modules/frontend/traceid_handlers_test.go +++ b/modules/frontend/traceid_handlers_test.go @@ -33,6 +33,7 @@ var config = &Config{ Sharder: SearchSharderConfig{ ConcurrentRequests: defaultConcurrentRequests, TargetBytesPerRequest: defaultTargetBytesPerRequest, + MostRecentShards: defaultMostRecentShards, }, SLO: testSLOcfg, }, From 2206c891e2a4865ab2356bb582d9c40761ac5a77 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Thu, 7 Nov 2024 12:02:04 -0500 Subject: [PATCH 9/9] dont mess with me lint. i will uninstall you Signed-off-by: Joe Elliott --- modules/frontend/frontend_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/frontend/frontend_test.go b/modules/frontend/frontend_test.go index 7d0f23bf199..0713f332435 100644 --- a/modules/frontend/frontend_test.go +++ b/modules/frontend/frontend_test.go @@ -234,5 +234,4 @@ func TestFrontendBadConfigFails(t *testing.T) { }, nil, nil, nil, nil, "", log.NewNopLogger(), nil) assert.EqualError(t, err, "most recent shards must be greater than 0") assert.Nil(t, f) - }