Skip to content

Commit

Permalink
TraceQL metrics time range fixes (#4325)
Browse files Browse the repository at this point in the history
* Disconnect job time range filtering from step, so that results in split backend/recent range is accurate

* changelog
  • Loading branch information
mdisibio authored Nov 15, 2024
1 parent d730dfc commit 344743d
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 26 deletions.
7 changes: 4 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,19 @@
* [ENHANCEMENT] chore: remove gofakeit dependency [#4274](https://github.com/grafana/tempo/pull/4274) (@javiermolinar)
* [ENHANCEMENT] Add a max flush attempts and metric to the metrics generator [#4254](https://github.com/grafana/tempo/pull/4254) (@joe-elliott)
* [ENHANCEMENT] Collection of query-frontend changes to reduce allocs. [#4242](https://github.com/grafana/tempo/pull/4242) (@joe-elliott)
* [ENHANCEMENT] Added `insecure-skip-verify` option in tempo-cli to skip SSL certificate validation when connecting to the S3 backend. [#44236](https://github.com/grafana/tempo/pull/4259) (@faridtmammadov)
* [ENHANCEMENT] Added `insecure-skip-verify` option in tempo-cli to skip SSL certificate validation when connecting to the S3 backend. [#4259](https://github.com/grafana/tempo/pull/4259) (@faridtmammadov)
* [ENHANCEMENT] Chore: delete spanlogger. [4312](https://github.com/grafana/tempo/pull/4312) (@javiermolinar)
* [ENHANCEMENT] Add `invalid_utf8` to reasons spanmetrics will discard spans. [#4293](https://github.com/grafana/tempo/pull/4293) (@zalegrala)
* [ENHANCEMENT] Reduce frontend and querier allocations by dropping HTTP headers early in the pipeline. [#4298](https://github.com/grafana/tempo/pull/4298) (@joe-elliott)
* [BUGFIX] Replace hedged requests roundtrips total with a counter. [#4063](https://github.com/grafana/tempo/pull/4063) [#4078](https://github.com/grafana/tempo/pull/4078) (@galalen)
* [BUGFIX] Metrics generators: Correctly drop from the ring before stopping ingestion to reduce drops during a rollout. [#4101](https://github.com/grafana/tempo/pull/4101) (@joe-elliott)
* [BUGFIX] Correctly handle 400 Bad Request and 404 Not Found in gRPC streaming [#4144](https://github.com/grafana/tempo/pull/4144) (@mapno)
* [BUGFIX] Pushes a 0 to classic histogram's counter when the series is new to allow Prometheus to start from a non-null value. [#4140](https://github.com/grafana/tempo/pull/4140) (@mapno)
* [BUGFIX] Fix counter samples being downsampled by backdate to the previous minute the initial sample when the series is new [#44236](https://github.com/grafana/tempo/pull/4236) (@javiermolinar)
* [BUGFIX] Fix counter samples being downsampled by backdate to the previous minute the initial sample when the series is new [#4236](https://github.com/grafana/tempo/pull/4236) (@javiermolinar)
* [BUGFIX] Fix traceql metrics time range handling at the cutoff between recent and backend data [#4257](https://github.com/grafana/tempo/issues/4257) (@mdisibio)
* [BUGFIX] Skip computing exemplars for instant queries. [#4204](https://github.com/grafana/tempo/pull/4204) (@javiermolinar)
* [BUGFIX] Gave context to orphaned spans related to various maintenance processes. [#4260](https://github.com/grafana/tempo/pull/4260) (@joe-elliott)
* [BUGFIX] Utilize S3Pass and S3User parameters in tempo-cli options, which were previously unused in the code. [#44236](https://github.com/grafana/tempo/pull/4259) (@faridtmammadov)
* [BUGFIX] Utilize S3Pass and S3User parameters in tempo-cli options, which were previously unused in the code. [#4259](https://github.com/grafana/tempo/pull/4259) (@faridtmammadov)

# v2.6.1

Expand Down
46 changes: 34 additions & 12 deletions pkg/traceql/engine_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,25 @@ func DefaultQueryRangeStep(start, end uint64) uint64 {

// IntervalCount is the number of intervals in the range with step.
func IntervalCount(start, end, step uint64) int {
start = alignStart(start, step)
end = alignEnd(end, step)

intervals := (end - start) / step
intervals++
return int(intervals)
}

// TimestampOf the given interval with the start and step.
func TimestampOf(interval, start, step uint64) uint64 {
start = alignStart(start, step)
return start + interval*step
}

// IntervalOf the given timestamp within the range and step.
func IntervalOf(ts, start, end, step uint64) int {
start = alignStart(start, step)
end = alignEnd(end, step) + step

if ts < start || ts > end || end == start || step == 0 {
// Invalid
return -1
Expand Down Expand Up @@ -89,10 +96,6 @@ func TrimToOverlap(start1, end1, step, start2, end2 uint64) (uint64, uint64, uin
if wasInstant {
// Alter step to maintain instant nature
step = end1 - start1
} else {
// Realign after trimming
start1 = (start1 / step) * step
end1 = (end1/step)*step + step
}

return start1, end1, step
Expand All @@ -110,9 +113,6 @@ func TrimToBefore(req *tempopb.QueryRangeRequest, before time.Time) {
if wasInstant {
// Maintain instant nature of the request
req.Step = req.End - req.Start
} else {
// Realign after trimming
AlignRequest(req)
}
}

Expand All @@ -128,9 +128,6 @@ func TrimToAfter(req *tempopb.QueryRangeRequest, before time.Time) {
if wasInstant {
// Maintain instant nature of the request
req.Step = req.End - req.Start
} else {
// Realign after trimming
AlignRequest(req)
}
}

Expand All @@ -148,8 +145,30 @@ func AlignRequest(req *tempopb.QueryRangeRequest) {
}

// It doesn't really matter but the request fields are expected to be in nanoseconds.
req.Start = req.Start / req.Step * req.Step
req.End = req.End / req.Step * req.Step
req.Start = alignStart(req.Start, req.Step)
req.End = alignEnd(req.End, req.Step)
}

// Start time is rounded down to next step
func alignStart(start, step uint64) uint64 {
if step == 0 {
return 0
}
return start - start%step
}

// End time is rounded up to next step
func alignEnd(end, step uint64) uint64 {
if step == 0 {
return 0
}

mod := end % step
if mod == 0 {
return end
}

return end + (step - mod)
}

type Label struct {
Expand Down Expand Up @@ -238,6 +257,9 @@ func (set SeriesSet) ToProtoDiff(req *tempopb.QueryRangeRequest, rangeForLabels
continue
}

start = alignStart(start, req.Step)
end = alignEnd(end, req.Step)

intervals := IntervalCount(start, end, req.Step)
samples := make([]tempopb.Sample, 0, intervals)
for i, value := range s.Values {
Expand Down
17 changes: 6 additions & 11 deletions pkg/traceql/engine_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func TestStepRangeToIntervals(t *testing.T) {
start: 0,
end: 10,
step: 3,
expected: 4, // 0, 3, 6, 9
expected: 5, // 0, 3, 6, 9, 12
},
}

Expand All @@ -60,9 +60,9 @@ func TestTimestampOf(t *testing.T) {
},
{
interval: 2,
start: 10,
start: 10, // aligned to 9
step: 3,
expected: 16,
expected: 15, // 9, 12, 15 <-- intervals
},
}

Expand Down Expand Up @@ -104,21 +104,16 @@ func TestTrimToOverlap(t *testing.T) {
expectedStep time.Duration
}{
{
// Inner range of 33 to 38
// gets rounded at 5m intervals to 30 to 40
// Fully overlapping
"2024-01-01 01:00:00", "2024-01-01 02:00:00", 5 * time.Minute,
"2024-01-01 01:33:00", "2024-01-01 01:38:00",
"2024-01-01 01:30:00", "2024-01-01 01:40:00", 5 * time.Minute,
"2024-01-01 01:33:00", "2024-01-01 01:38:00", 5 * time.Minute,
},
{
// Partially Overlapping
// Overlap between 1:01-2:01 and 1:31-2:31
// in 5m intervals is only 1:30-2:05
// Start is pushed back
// and end is pushed out
"2024-01-01 01:01:00", "2024-01-01 02:01:00", 5 * time.Minute,
"2024-01-01 01:31:00", "2024-01-01 02:31:00",
"2024-01-01 01:30:00", "2024-01-01 02:05:00", 5 * time.Minute,
"2024-01-01 01:31:00", "2024-01-01 02:01:00", 5 * time.Minute,
},
{
// Instant query
Expand Down

0 comments on commit 344743d

Please sign in to comment.