From b00ec3ccc0072ece12c5a8885ebb911286524fe2 Mon Sep 17 00:00:00 2001 From: "Brian L. Troutwine" Date: Mon, 11 Sep 2023 17:17:12 +0200 Subject: [PATCH] Allow the user to configure the range of DogStatsD metrics This commit introduces a configuration option to allow the user to define the range of values that appear in DogStatsD metrics. This range applies to all metric kinds. The distribution is changed from Standard -- "numerically uniform" -- to actually Uniform. We do not allow users to configure the distribution. REF SMP-694 Signed-off-by: Brian L. Troutwine --- lading/src/block.rs | 6 ++++ lading_payload/src/dogstatsd.rs | 22 +++++++++++++++ lading_payload/src/dogstatsd/common.rs | 38 +++++++++++++++++++++----- lading_payload/src/dogstatsd/metric.rs | 14 +++++++--- 4 files changed, 69 insertions(+), 11 deletions(-) diff --git a/lading/src/block.rs b/lading/src/block.rs index a8618fd9d..e2b1b0a92 100644 --- a/lading/src/block.rs +++ b/lading/src/block.rs @@ -155,6 +155,8 @@ impl Cache { multivalue_count_maximum, kind_weights, metric_weights, + value_minimum, + value_maximum, }) => { let context_range = *contexts_minimum..*contexts_maximum; let tags_per_msg_range = *tags_per_msg_minimum..*tags_per_msg_maximum; @@ -173,6 +175,7 @@ impl Cache { *multivalue_pack_probability, *kind_weights, *metric_weights, + *value_minimum..*value_maximum, &mut rng, ); @@ -286,6 +289,8 @@ fn stream_inner( multivalue_count_maximum, kind_weights, metric_weights, + value_minimum, + value_maximum, }) => { let context_range = *contexts_minimum..*contexts_maximum; let tags_per_msg_range = *tags_per_msg_minimum..*tags_per_msg_maximum; @@ -304,6 +309,7 @@ fn stream_inner( *multivalue_pack_probability, *kind_weights, *metric_weights, + *value_minimum..*value_maximum, &mut rng, ); diff --git a/lading_payload/src/dogstatsd.rs b/lading_payload/src/dogstatsd.rs index 95da28a67..7839f7f59 100644 --- a/lading_payload/src/dogstatsd.rs +++ b/lading_payload/src/dogstatsd.rs @@ -27,6 +27,14 @@ fn contexts_maximum() -> u16 { 10_000 } +fn value_minimum() -> f64 { + f64::MIN +} + +fn value_maximum() -> f64 { + f64::MAX +} + // https://docs.datadoghq.com/developers/guide/what-best-practices-are-recommended-for-naming-metrics-and-tags/#rules-and-best-practices-for-naming-metrics fn name_length_minimum() -> u16 { 1 @@ -184,9 +192,18 @@ pub struct Config { /// payload. #[serde(default)] pub kind_weights: KindWeights, + /// Defines the relative probability of each kind of DogStatsD metric. #[serde(default)] pub metric_weights: MetricWeights, + + /// The minimum value to appear in metrics. + #[serde(default = "value_minimum")] + pub value_minimum: f64, + + /// The maximum value to appear in metrics. + #[serde(default = "value_maximum")] + pub value_maximum: f64, } fn choose_or_not_ref<'a, R, T>(mut rng: &mut R, pool: &'a [T]) -> Option<&'a T> @@ -272,6 +289,7 @@ impl MemberGenerator { multivalue_pack_probability: f32, kind_weights: KindWeights, metric_weights: MetricWeights, + num_value_range: Range, mut rng: &mut R, ) -> Self where @@ -343,6 +361,7 @@ impl MemberGenerator { small_strings, tagsets.clone(), pool.as_ref(), + num_value_range, &mut rng, ); @@ -425,6 +444,7 @@ impl DogStatsD { multivalue_pack_probability(), KindWeights::default(), MetricWeights::default(), + value_minimum()..value_maximum(), rng, ) } @@ -454,6 +474,7 @@ impl DogStatsD { multivalue_pack_probability: f32, kind_weights: KindWeights, metric_weights: MetricWeights, + num_value_range: Range, rng: &mut R, ) -> Self where @@ -469,6 +490,7 @@ impl DogStatsD { multivalue_pack_probability, kind_weights, metric_weights, + num_value_range, rng, ); diff --git a/lading_payload/src/dogstatsd/common.rs b/lading_payload/src/dogstatsd/common.rs index 479040649..aad7002fc 100644 --- a/lading_payload/src/dogstatsd/common.rs +++ b/lading_payload/src/dogstatsd/common.rs @@ -1,6 +1,12 @@ -use std::fmt; +use std::{fmt, ops::Range}; -use rand::{distributions::Standard, prelude::Distribution, Rng}; +use rand::{ + distributions::{Standard, Uniform}, + prelude::Distribution, + Rng, +}; + +use crate::Generator; pub(crate) mod tags; @@ -10,14 +16,32 @@ pub(crate) enum NumValue { Int(i64), } -impl Distribution for Standard { - fn sample(&self, rng: &mut R) -> NumValue +#[derive(Clone, Debug)] +pub(crate) struct NumValueGenerator { + float_distr: Uniform, + int_distr: Uniform, +} + +impl NumValueGenerator { + #[allow(clippy::cast_possible_truncation)] + pub(crate) fn new(range: Range) -> Self { + Self { + float_distr: Uniform::new(range.start, range.end), + int_distr: Uniform::new(range.start as i64, range.end as i64), + } + } +} + +impl<'a> Generator<'a> for NumValueGenerator { + type Output = NumValue; + + fn generate(&'a self, rng: &mut R) -> Self::Output where - R: Rng + ?Sized, + R: rand::Rng + ?Sized, { match rng.gen_range(0..=1) { - 0 => NumValue::Float(rng.gen()), - 1 => NumValue::Int(rng.gen()), + 0 => NumValue::Float(self.float_distr.sample(rng)), + 1 => NumValue::Int(self.int_distr.sample(rng)), _ => unreachable!(), } } diff --git a/lading_payload/src/dogstatsd/metric.rs b/lading_payload/src/dogstatsd/metric.rs index 0b2575467..5652c5e96 100644 --- a/lading_payload/src/dogstatsd/metric.rs +++ b/lading_payload/src/dogstatsd/metric.rs @@ -1,7 +1,7 @@ use std::{fmt, ops::Range}; use rand::{ - distributions::{OpenClosed01, Standard, WeightedIndex}, + distributions::{OpenClosed01, WeightedIndex}, prelude::{Distribution, SliceRandom}, Rng, }; @@ -9,7 +9,10 @@ use rand::{ use crate::{common::strings, dogstatsd::metric::template::Template, Generator}; use tracing::debug; -use super::{choose_or_not_ref, common}; +use super::{ + choose_or_not_ref, + common::{self, NumValueGenerator}, +}; mod template; @@ -19,6 +22,7 @@ pub(crate) struct MetricGenerator { pub(crate) templates: Vec, pub(crate) multivalue_count_range: Range, pub(crate) multivalue_pack_probability: f32, + pub(crate) num_value_generator: NumValueGenerator, } impl MetricGenerator { @@ -32,6 +36,7 @@ impl MetricGenerator { container_ids: Vec, tagsets: common::tags::Tagsets, str_pool: &strings::Pool, + num_value_range: Range, mut rng: &mut R, ) -> Self where @@ -65,6 +70,7 @@ impl MetricGenerator { templates, multivalue_count_range, multivalue_pack_probability, + num_value_generator: NumValueGenerator::new(num_value_range), } } } @@ -88,14 +94,14 @@ impl<'a> Generator<'a> for MetricGenerator { let sample_rate = rng.gen(); let mut values = Vec::with_capacity(self.multivalue_count_range.end as usize); - let value: common::NumValue = Standard.sample(&mut rng); + let value: common::NumValue = self.num_value_generator.generate(&mut rng); values.push(value); let prob: f32 = OpenClosed01.sample(&mut rng); if prob < self.multivalue_pack_probability { let num_desired_values = rng.gen_range(self.multivalue_count_range.clone()); for _ in 1..num_desired_values { - values.push(Standard.sample(&mut rng)); + values.push(self.num_value_generator.generate(&mut rng)); } }