Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TraceQL metrics time range fixes #4325

Merged
merged 2 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,19 @@
* [ENHANCEMENT] chore: remove gofakeit dependency [#4274](https://github.com/grafana/tempo/pull/4274) (@javiermolinar)
* [ENHANCEMENT] Add a max flush attempts and metric to the metrics generator [#4254](https://github.com/grafana/tempo/pull/4254) (@joe-elliott)
* [ENHANCEMENT] Collection of query-frontend changes to reduce allocs. [#4242](https://github.com/grafana/tempo/pull/4242) (@joe-elliott)
* [ENHANCEMENT] Added `insecure-skip-verify` option in tempo-cli to skip SSL certificate validation when connecting to the S3 backend. [#44236](https://github.com/grafana/tempo/pull/4259) (@faridtmammadov)
* [ENHANCEMENT] Added `insecure-skip-verify` option in tempo-cli to skip SSL certificate validation when connecting to the S3 backend. [#4259](https://github.com/grafana/tempo/pull/4259) (@faridtmammadov)
* [ENHANCEMENT] Chore: delete spanlogger. [4312](https://github.com/grafana/tempo/pull/4312) (@javiermolinar)
* [ENHANCEMENT] Add `invalid_utf8` to reasons spanmetrics will discard spans. [#4293](https://github.com/grafana/tempo/pull/4293) (@zalegrala)
* [ENHANCEMENT] Reduce frontend and querier allocations by dropping HTTP headers early in the pipeline. [#4298](https://github.com/grafana/tempo/pull/4298) (@joe-elliott)
* [BUGFIX] Replace hedged requests roundtrips total with a counter. [#4063](https://github.com/grafana/tempo/pull/4063) [#4078](https://github.com/grafana/tempo/pull/4078) (@galalen)
* [BUGFIX] Metrics generators: Correctly drop from the ring before stopping ingestion to reduce drops during a rollout. [#4101](https://github.com/grafana/tempo/pull/4101) (@joe-elliott)
* [BUGFIX] Correctly handle 400 Bad Request and 404 Not Found in gRPC streaming [#4144](https://github.com/grafana/tempo/pull/4144) (@mapno)
* [BUGFIX] Pushes a 0 to classic histogram's counter when the series is new to allow Prometheus to start from a non-null value. [#4140](https://github.com/grafana/tempo/pull/4140) (@mapno)
* [BUGFIX] Fix counter samples being downsampled by backdate to the previous minute the initial sample when the series is new [#44236](https://github.com/grafana/tempo/pull/4236) (@javiermolinar)
* [BUGFIX] Fix counter samples being downsampled by backdate to the previous minute the initial sample when the series is new [#4236](https://github.com/grafana/tempo/pull/4236) (@javiermolinar)
* [BUGFIX] Fix traceql metrics time range handling at the cutoff between recent and backend data [#4257](https://github.com/grafana/tempo/issues/4257) (@mdisibio)
* [BUGFIX] Skip computing exemplars for instant queries. [#4204](https://github.com/grafana/tempo/pull/4204) (@javiermolinar)
* [BUGFIX] Gave context to orphaned spans related to various maintenance processes. [#4260](https://github.com/grafana/tempo/pull/4260) (@joe-elliott)
* [BUGFIX] Utilize S3Pass and S3User parameters in tempo-cli options, which were previously unused in the code. [#44236](https://github.com/grafana/tempo/pull/4259) (@faridtmammadov)
* [BUGFIX] Utilize S3Pass and S3User parameters in tempo-cli options, which were previously unused in the code. [#4259](https://github.com/grafana/tempo/pull/4259) (@faridtmammadov)

# v2.6.1

Expand Down
46 changes: 34 additions & 12 deletions pkg/traceql/engine_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,25 @@ func DefaultQueryRangeStep(start, end uint64) uint64 {

// IntervalCount is the number of intervals in the range with step.
func IntervalCount(start, end, step uint64) int {
start = alignStart(start, step)
end = alignEnd(end, step)

intervals := (end - start) / step
intervals++
return int(intervals)
}

// TimestampOf the given interval with the start and step.
func TimestampOf(interval, start, step uint64) uint64 {
start = alignStart(start, step)
return start + interval*step
}

// IntervalOf the given timestamp within the range and step.
func IntervalOf(ts, start, end, step uint64) int {
start = alignStart(start, step)
end = alignEnd(end, step) + step

if ts < start || ts > end || end == start || step == 0 {
// Invalid
return -1
Expand Down Expand Up @@ -89,10 +96,6 @@ func TrimToOverlap(start1, end1, step, start2, end2 uint64) (uint64, uint64, uin
if wasInstant {
// Alter step to maintain instant nature
step = end1 - start1
} else {
// Realign after trimming
start1 = (start1 / step) * step
end1 = (end1/step)*step + step
}

return start1, end1, step
Expand All @@ -110,9 +113,6 @@ func TrimToBefore(req *tempopb.QueryRangeRequest, before time.Time) {
if wasInstant {
// Maintain instant nature of the request
req.Step = req.End - req.Start
} else {
// Realign after trimming
AlignRequest(req)
}
}

Expand All @@ -128,9 +128,6 @@ func TrimToAfter(req *tempopb.QueryRangeRequest, before time.Time) {
if wasInstant {
// Maintain instant nature of the request
req.Step = req.End - req.Start
} else {
// Realign after trimming
AlignRequest(req)
}
}

Expand All @@ -148,8 +145,30 @@ func AlignRequest(req *tempopb.QueryRangeRequest) {
}

// It doesn't really matter but the request fields are expected to be in nanoseconds.
req.Start = req.Start / req.Step * req.Step
req.End = req.End / req.Step * req.Step
req.Start = alignStart(req.Start, req.Step)
req.End = alignEnd(req.End, req.Step)
}

// Start time is rounded down to next step
func alignStart(start, step uint64) uint64 {
if step == 0 {
return 0
}
return start - start%step
}

// End time is rounded up to next step
func alignEnd(end, step uint64) uint64 {
if step == 0 {
return 0
}

mod := end % step
if mod == 0 {
return end
}

return end + (step - mod)
}

type Label struct {
Expand Down Expand Up @@ -238,6 +257,9 @@ func (set SeriesSet) ToProtoDiff(req *tempopb.QueryRangeRequest, rangeForLabels
continue
}

start = alignStart(start, req.Step)
end = alignEnd(end, req.Step)

intervals := IntervalCount(start, end, req.Step)
samples := make([]tempopb.Sample, 0, intervals)
for i, value := range s.Values {
Expand Down
17 changes: 6 additions & 11 deletions pkg/traceql/engine_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func TestStepRangeToIntervals(t *testing.T) {
start: 0,
end: 10,
step: 3,
expected: 4, // 0, 3, 6, 9
expected: 5, // 0, 3, 6, 9, 12
},
}

Expand All @@ -60,9 +60,9 @@ func TestTimestampOf(t *testing.T) {
},
{
interval: 2,
start: 10,
start: 10, // aligned to 9
step: 3,
expected: 16,
expected: 15, // 9, 12, 15 <-- intervals
},
}

Expand Down Expand Up @@ -104,21 +104,16 @@ func TestTrimToOverlap(t *testing.T) {
expectedStep time.Duration
}{
{
// Inner range of 33 to 38
// gets rounded at 5m intervals to 30 to 40
// Fully overlapping
"2024-01-01 01:00:00", "2024-01-01 02:00:00", 5 * time.Minute,
"2024-01-01 01:33:00", "2024-01-01 01:38:00",
"2024-01-01 01:30:00", "2024-01-01 01:40:00", 5 * time.Minute,
"2024-01-01 01:33:00", "2024-01-01 01:38:00", 5 * time.Minute,
},
{
// Partially Overlapping
// Overlap between 1:01-2:01 and 1:31-2:31
// in 5m intervals is only 1:30-2:05
// Start is pushed back
// and end is pushed out
"2024-01-01 01:01:00", "2024-01-01 02:01:00", 5 * time.Minute,
"2024-01-01 01:31:00", "2024-01-01 02:31:00",
"2024-01-01 01:30:00", "2024-01-01 02:05:00", 5 * time.Minute,
"2024-01-01 01:31:00", "2024-01-01 02:01:00", 5 * time.Minute,
},
{
// Instant query
Expand Down