Skip to content

Commit

Permalink
Make LivenessCheck Timeout Configurable (#6227)
Browse files Browse the repository at this point in the history
  • Loading branch information
rajagopalanand authored Sep 25, 2024
1 parent a54da24 commit cccfd73
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
6 changes: 6 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -4480,6 +4480,12 @@ ring:
# Enable high availability
# CLI flag: -ruler.enable-ha-evaluation
[enable_ha_evaluation: <boolean> | default = false]
# Timeout duration for non-primary rulers during liveness checks. If the check
# times out, the non-primary ruler will evaluate the rule group. Applicable when
# ruler.enable-ha-evaluation is true.
# CLI flag: -ruler.liveness-check-timeout
[liveness_check_timeout: <duration> | default = 1s]
```

### `ruler_storage_config`
Expand Down
8 changes: 4 additions & 4 deletions pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,6 @@ const (
unknownHealthFilter string = "unknown"
okHealthFilter string = "ok"
errHealthFilter string = "err"

livenessCheckTimeout = 100 * time.Millisecond
)

type DisabledRuleGroupErr struct {
Expand Down Expand Up @@ -161,7 +159,8 @@ type Config struct {
EnableQueryStats bool `yaml:"query_stats_enabled"`
DisableRuleGroupLabel bool `yaml:"disable_rule_group_label"`

EnableHAEvaluation bool `yaml:"enable_ha_evaluation"`
EnableHAEvaluation bool `yaml:"enable_ha_evaluation"`
LivenessCheckTimeout time.Duration `yaml:"liveness_check_timeout"`
}

// Validate config and returns error on failure
Expand Down Expand Up @@ -238,6 +237,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.BoolVar(&cfg.DisableRuleGroupLabel, "ruler.disable-rule-group-label", false, "Disable the rule_group label on exported metrics")

f.BoolVar(&cfg.EnableHAEvaluation, "ruler.enable-ha-evaluation", false, "Enable high availability")
f.DurationVar(&cfg.LivenessCheckTimeout, "ruler.liveness-check-timeout", 1*time.Second, "Timeout duration for non-primary rulers during liveness checks. If the check times out, the non-primary ruler will evaluate the rule group. Applicable when ruler.enable-ha-evaluation is true.")

cfg.RingCheckPeriod = 5 * time.Second
}
Expand Down Expand Up @@ -590,7 +590,7 @@ func (r *Ruler) nonPrimaryInstanceOwnsRuleGroup(g *rulespb.RuleGroupDesc, replic
responseChan := make(chan *LivenessCheckResponse, len(jobs))

ctx := user.InjectOrgID(context.Background(), userID)
ctx, cancel := context.WithTimeout(ctx, livenessCheckTimeout)
ctx, cancel := context.WithTimeout(ctx, r.cfg.LivenessCheckTimeout)
defer cancel()

err := concurrency.ForEach(ctx, jobs, len(jobs), func(ctx context.Context, job interface{}) error {
Expand Down

0 comments on commit cccfd73

Please sign in to comment.