From d6bab51702b1768530040a59ef2500a09b489d3b Mon Sep 17 00:00:00 2001 From: "Brian L. Troutwine" Date: Thu, 12 Oct 2023 16:47:31 -0700 Subject: [PATCH] Adjust dogstatsd configuration to allow for constant values We have the need to specify constant values for some of our range constructions in dogstatsd payloads. This commit allows that behavior, although users must now be explicit about the constant or inclusive setup they intend. This will be very helpful in experimenting with the Agent, we believe. REF https://github.com/DataDog/datadog-agent/pull/19993 REF SMPTNG-24 Signed-off-by: Brian L. Troutwine --- Cargo.lock | 2 +- lading/Cargo.toml | 2 +- lading/src/block.rs | 74 ++--- .../proptest-regressions/common/strings.txt | 1 + .../proptest-regressions/dogstatsd.txt | 7 + lading_payload/src/common/strings.rs | 5 +- lading_payload/src/dogstatsd.rs | 294 ++++++++---------- lading_payload/src/dogstatsd/common.rs | 6 +- lading_payload/src/dogstatsd/common/tags.rs | 59 ++-- lading_payload/src/dogstatsd/event.rs | 10 +- lading_payload/src/dogstatsd/metric.rs | 25 +- 11 files changed, 229 insertions(+), 256 deletions(-) create mode 100644 lading_payload/proptest-regressions/dogstatsd.txt diff --git a/Cargo.lock b/Cargo.lock index 7fe8d3528..42081f7db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1010,7 +1010,7 @@ dependencies = [ [[package]] name = "lading" -version = "0.18.1" +version = "0.19.0-rc1" dependencies = [ "async-pidfd", "byte-unit", diff --git a/lading/Cargo.toml b/lading/Cargo.toml index 199a6a78b..5e020ee4a 100644 --- a/lading/Cargo.toml +++ b/lading/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lading" -version = "0.18.1" +version = "0.19.0-rc1" authors = ["Brian L. Troutwine ", "George Hahn { - let context_range = *contexts_minimum..*contexts_maximum; - let tags_per_msg_range = *tags_per_msg_minimum..*tags_per_msg_maximum; - let name_length_range = *name_length_minimum..*name_length_maximum; - let tag_key_length_range = *tag_key_length_minimum..*tag_key_length_maximum; - let tag_value_length_range = *tag_value_length_minimum..*tag_value_length_maximum; - let multivalue_count_range = *multivalue_count_minimum..*multivalue_count_maximum; - let serializer = payload::DogStatsD::new( - context_range, - name_length_range, - tag_key_length_range, - tag_value_length_range, - tags_per_msg_range, - multivalue_count_range, + *contexts, + *name_length, + *tag_key_length, + *tag_value_length, + *tags_per_msg, + *multivalue_count, *multivalue_pack_probability, *kind_weights, *metric_weights, @@ -272,38 +259,25 @@ fn stream_inner( stream_block_inner(&mut rng, total_bytes, &pyld, block_chunks, &snd) } payload::Config::DogStatsD(payload::dogstatsd::Config { - contexts_minimum, - contexts_maximum, - name_length_minimum, - name_length_maximum, - tag_key_length_minimum, - tag_key_length_maximum, - tag_value_length_minimum, - tag_value_length_maximum, - tags_per_msg_minimum, - tags_per_msg_maximum, + contexts, + name_length, + tag_key_length, + tag_value_length, + tags_per_msg, // TODO -- Validate user input for multivalue_pack_probability. multivalue_pack_probability, - multivalue_count_minimum, - multivalue_count_maximum, + multivalue_count, kind_weights, metric_weights, value, }) => { - let context_range = *contexts_minimum..*contexts_maximum; - let tags_per_msg_range = *tags_per_msg_minimum..*tags_per_msg_maximum; - let name_length_range = *name_length_minimum..*name_length_maximum; - let tag_key_length_range = *tag_key_length_minimum..*tag_key_length_maximum; - let tag_value_length_range = *tag_value_length_minimum..*tag_value_length_maximum; - let multivalue_count_range = *multivalue_count_minimum..*multivalue_count_maximum; - let pyld = payload::DogStatsD::new( - context_range, - name_length_range, - tag_key_length_range, - tag_value_length_range, - tags_per_msg_range, - multivalue_count_range, + *contexts, + *name_length, + *tag_key_length, + *tag_value_length, + *tags_per_msg, + *multivalue_count, *multivalue_pack_probability, *kind_weights, *metric_weights, diff --git a/lading_payload/proptest-regressions/common/strings.txt b/lading_payload/proptest-regressions/common/strings.txt index 3f1775844..9ece70b06 100644 --- a/lading_payload/proptest-regressions/common/strings.txt +++ b/lading_payload/proptest-regressions/common/strings.txt @@ -7,3 +7,4 @@ cc af61ca38851fafeed4d96b45d1d1aca38e2ab8a1489b51ccae6e0e178640e569 # shrinks to seed = 0, max_bytes = 2, of_size_bytes = 0, alphabet = "a🈐aAaA0𐦀A𐠊A\u{e0100}" cc e4740adac235cc14baefdb17ab4e7b825a9100e1e140031f72c0eb66783a6bdc # shrinks to seed = 49880598398515969, max_bytes = 3977, of_size_bytes = 0, alphabet = "ힰA\u{11c92}® " cc 7d00cd4ac860c10fd2a903761b1540638fb95f92bfd352cbf952ec53760bedb4 # shrinks to seed = 170628173656970550, max_bytes = 7313, of_size_bytes = 0, alphabet = " 𒒀" +cc c3bbc5eb64e117c599cfe76df189ae8ec8238c47e76a942970505e5da6dcee1b # shrinks to seed = 0, max_bytes = 43894, of_size_bytes = 43894 diff --git a/lading_payload/proptest-regressions/dogstatsd.txt b/lading_payload/proptest-regressions/dogstatsd.txt new file mode 100644 index 000000000..67c691387 --- /dev/null +++ b/lading_payload/proptest-regressions/dogstatsd.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 97158e48115dce6b53c08d3f3744ebd92cb711273b19aee225daa774f71f6e23 # shrinks to seed = 0, max_bytes = 0 diff --git a/lading_payload/src/common/strings.rs b/lading_payload/src/common/strings.rs index 603800ea4..ca79fc693 100644 --- a/lading_payload/src/common/strings.rs +++ b/lading_payload/src/common/strings.rs @@ -128,7 +128,8 @@ mod test { } } - // Ensure that of_size only returns None if the request is larger than the interior size. + // Ensure that of_size only returns None if the request is greater than or + // equal to the interior size. proptest! { #[test] fn return_none_condition(seed: u64, max_bytes: u16, of_size_bytes: u16) { @@ -138,7 +139,7 @@ mod test { let pool = Pool::with_size_and_alphabet(&mut rng, max_bytes, ALPHANUM); if pool.of_size(&mut rng, of_size_bytes).is_none() { - assert!(of_size_bytes > max_bytes); + assert!(of_size_bytes >= max_bytes); } } } diff --git a/lading_payload/src/dogstatsd.rs b/lading_payload/src/dogstatsd.rs index a8711fa59..43119e5cf 100644 --- a/lading_payload/src/dogstatsd.rs +++ b/lading_payload/src/dogstatsd.rs @@ -1,8 +1,13 @@ //! `DogStatsD` payload. -use std::{fmt, io::Write, ops::Range, rc::Rc}; +use std::{cmp, fmt, io::Write, ops::Range, rc::Rc}; -use rand::{distributions::WeightedIndex, prelude::Distribution, seq::SliceRandom, Rng}; +use rand::{ + distributions::{uniform::SampleUniform, WeightedIndex}, + prelude::Distribution, + seq::SliceRandom, + Rng, +}; use serde::Deserialize; use crate::{common::strings, Error, Serialize}; @@ -19,18 +24,17 @@ mod event; mod metric; mod service_check; -fn contexts_minimum() -> u32 { - 5000 -} - -fn contexts_maximum() -> u32 { - 10_000 +fn contexts() -> ConfRange { + ConfRange::Inclusive { + min: 5_000, + max: 10_000, + } } fn value_config() -> ValueConf { ValueConf { float_probability: 0.5, // 50% - range: ValueRange::Inclusive { + range: ConfRange::Inclusive { min: i64::MIN, max: i64::MAX, }, @@ -38,50 +42,30 @@ fn value_config() -> ValueConf { } // https://docs.datadoghq.com/developers/guide/what-best-practices-are-recommended-for-naming-metrics-and-tags/#rules-and-best-practices-for-naming-metrics -fn name_length_minimum() -> u16 { - 1 -} - -fn name_length_maximum() -> u16 { - 200 +fn name_length() -> ConfRange { + ConfRange::Inclusive { min: 1, max: 200 } } -fn tag_key_length_minimum() -> u16 { - 1 +fn tag_key_length() -> ConfRange { + ConfRange::Inclusive { min: 1, max: 100 } } -fn tag_key_length_maximum() -> u16 { - 100 +fn tag_value_length() -> ConfRange { + ConfRange::Inclusive { min: 1, max: 100 } } -fn tag_value_length_minimum() -> u16 { - 1 +fn tags_per_msg() -> ConfRange { + ConfRange::Inclusive { min: 2, max: 50 } } -fn tag_value_length_maximum() -> u16 { - 100 -} - -fn tags_per_msg_minimum() -> u16 { - 2 -} - -fn tags_per_msg_maximum() -> u16 { - 50 +fn multivalue_count() -> ConfRange { + ConfRange::Inclusive { min: 2, max: 32 } } fn multivalue_pack_probability() -> f32 { 0.08 } -fn multivalue_count_minimum() -> u16 { - 2 -} - -fn multivalue_count_maximum() -> u16 { - 32 -} - /// Weights for `DogStatsD` kinds: metrics, events, service checks /// /// Defines the relative probability of each kind of `DogStatsD` datagram. @@ -133,86 +117,87 @@ impl Default for MetricWeights { pub struct ValueConf { /// Odds out of 256 that the value will be a float and not an integer. float_probability: f32, - range: ValueRange, + range: ConfRange, } -/// Configuration for the values range of a metric. +/// Range expression for configuration #[derive(Debug, Deserialize, Clone, PartialEq, Copy)] -#[serde(rename_all = "snake_case")] -pub enum ValueRange { - /// Metric values are always constant. - Constant(i64), - /// Metric values are uniformly distributed between min and max, inclusive - /// of max. +pub enum ConfRange +where + T: PartialEq + cmp::PartialOrd + Clone + Copy, +{ + /// A constant T + Constant(T), + /// In which a T is chosen between `min` and `max`, inclusive of `max`. Inclusive { /// The minimum of the range. - min: i64, + min: T, /// The maximum of the range. - max: i64, + max: T, }, } +impl ConfRange +where + T: PartialEq + cmp::PartialOrd + Clone + Copy, +{ + fn end(&self) -> T { + match self { + ConfRange::Constant(c) => *c, + ConfRange::Inclusive { max, .. } => *max, + } + } +} + +impl ConfRange +where + T: PartialEq + cmp::PartialOrd + Clone + Copy + SampleUniform, +{ + fn sample(&self, rng: &mut R) -> T + where + R: rand::Rng + ?Sized, + { + match self { + ConfRange::Constant(c) => *c, + ConfRange::Inclusive { min, max } => rng.gen_range(*min..*max), + } + } +} + /// Configure the `DogStatsD` payload. #[derive(Debug, Deserialize, Clone, PartialEq, Copy)] pub struct Config { - /// Minimum number of unique metric contexts to generate - /// A context is a set of unique metric name + tags - #[serde(default = "contexts_minimum")] - pub contexts_minimum: u32, - - /// Maximum number of unique metric contexts to generate - /// A context is a set of unique metric name + tags - #[serde(default = "contexts_maximum")] - pub contexts_maximum: u32, - - /// Minimum length for a dogstatsd message name - #[serde(default = "name_length_minimum")] - pub name_length_minimum: u16, - - /// Maximum length for a dogstatsd message name - #[serde(default = "name_length_maximum")] - pub name_length_maximum: u16, - - /// Minimum length for the 'key' part of a dogstatsd tag - #[serde(default = "tag_key_length_minimum")] - pub tag_key_length_minimum: u16, - - /// Maximum length for the 'key' part of a dogstatsd tag - #[serde(default = "tag_key_length_maximum")] - pub tag_key_length_maximum: u16, - - /// Minimum length for the 'value' part of a dogstatsd tag - #[serde(default = "tag_value_length_minimum")] - pub tag_value_length_minimum: u16, - - /// Maximum length for the 'value' part of a dogstatsd tag - #[serde(default = "tag_value_length_maximum")] - pub tag_value_length_maximum: u16, - - /// Maximum number of tags per individual dogstatsd msg - /// a tag is a key-value pair separated by a : - #[serde(default = "tags_per_msg_maximum")] - pub tags_per_msg_maximum: u16, - - /// Minimum number of tags per individual dogstatsd msg - /// a tag is a key-value pair separated by a : - #[serde(default = "tags_per_msg_minimum")] - pub tags_per_msg_minimum: u16, + /// The unique metric contexts to generate A context is a set of unique + /// metric name + tags + #[serde(default = "contexts")] + pub contexts: ConfRange, + + /// Length for a dogstatsd message name + #[serde(default = "name_length")] + pub name_length: ConfRange, + + /// Length for the 'key' part of a dogstatsd tag + #[serde(default = "tag_key_length")] + pub tag_key_length: ConfRange, + + /// Length for the 'value' part of a dogstatsd tag + #[serde(default = "tag_value_length")] + pub tag_value_length: ConfRange, + + /// Number of tags per individual dogstatsd msg a tag is a key-value pair + /// separated by a : + #[serde(default = "tags_per_msg")] + pub tags_per_msg: ConfRange, /// Probability between 0 and 1 that a given dogstatsd msg /// contains multiple values #[serde(default = "multivalue_pack_probability")] pub multivalue_pack_probability: f32, - /// The minimum count of values that will be generated if - /// multi-value is chosen to be generated - #[serde(default = "multivalue_count_minimum")] - pub multivalue_count_minimum: u16, - - /// The maximum count of values that will be generated if - /// multi-value is chosen to be generated - #[serde(default = "multivalue_count_maximum")] - pub multivalue_count_maximum: u16, + /// The count of values that will be generated if multi-value is chosen to + /// be generated + #[serde(default = "multivalue_count")] + pub multivalue_count: ConfRange, /// Defines the relative probability of each kind of DogStatsD kinds of /// payload. @@ -265,7 +250,10 @@ where R: Rng + ?Sized, { let total = rng.gen_range(min_max); - let length_range = 1..max_length; + let length_range = ConfRange::Inclusive { + min: 1, + max: max_length, + }; random_strings_with_length_range(pool, total, length_range, rng) } @@ -276,7 +264,7 @@ where fn random_strings_with_length_range( pool: &strings::Pool, total: usize, - length_range: Range, + length_range: ConfRange, mut rng: &mut R, ) -> Vec where @@ -284,9 +272,8 @@ where { let mut buf = Vec::with_capacity(total); for _ in 0..total { - buf.push(String::from( - pool.of_size_range(&mut rng, length_range.clone()).unwrap(), - )); + let sz = length_range.sample(&mut rng) as usize; + buf.push(String::from(pool.of_size(&mut rng, sz).unwrap())); } buf } @@ -302,12 +289,12 @@ struct MemberGenerator { impl MemberGenerator { #[allow(clippy::too_many_arguments)] fn new( - context_range: Range, - name_length_range: Range, - tag_key_length_range: Range, - tag_value_length_range: Range, - tags_per_msg_range: Range, - multivalue_count_range: Range, + contexts: ConfRange, + name_length: ConfRange, + tag_key_length: ConfRange, + tag_value_length: ConfRange, + tags_per_msg: ConfRange, + multivalue_count: ConfRange, multivalue_pack_probability: f32, kind_weights: KindWeights, metric_weights: MetricWeights, @@ -319,26 +306,20 @@ impl MemberGenerator { { let pool = Rc::new(strings::Pool::with_size(&mut rng, 8_000_000)); - let context_range: Range = - context_range.start.try_into().unwrap()..context_range.end.try_into().unwrap(); - - let tags_per_msg_range: Range = tags_per_msg_range.start.try_into().unwrap() - ..tags_per_msg_range.end.try_into().unwrap(); - - let num_contexts = rng.gen_range(context_range); + let num_contexts = contexts.sample(rng); let tags_generator = tags::Generator { - num_tagsets: num_contexts, - tags_per_msg_range, - tag_key_length_range, - tag_value_length_range, + num_tagsets: num_contexts as usize, + tags_per_msg, + tag_key_length, + tag_value_length, str_pool: Rc::clone(&pool), }; let service_event_titles = random_strings_with_length_range( pool.as_ref(), - num_contexts, - name_length_range.clone(), + num_contexts as usize, + name_length, &mut rng, ); let tagsets = tags_generator.generate(&mut rng); @@ -361,7 +342,7 @@ impl MemberGenerator { let event_generator = EventGenerator { str_pool: Rc::clone(&pool), - title_length_range: name_length_range.clone(), + title_length: name_length, texts_or_messages_length_range: 1..1024, small_strings_length_range: 1..8, tagsets: tagsets.clone(), @@ -375,9 +356,9 @@ impl MemberGenerator { }; let metric_generator = MetricGenerator::new( - num_contexts, - name_length_range.clone(), - multivalue_count_range.clone(), + num_contexts as usize, + name_length, + multivalue_count, multivalue_pack_probability, &WeightedIndex::new(metric_choices).unwrap(), small_strings, @@ -457,12 +438,12 @@ impl DogStatsD { R: rand::Rng + ?Sized, { Self::new( - contexts_minimum()..contexts_maximum(), - name_length_minimum()..name_length_maximum(), - tag_key_length_minimum()..tag_key_length_maximum(), - tag_value_length_minimum()..tag_value_length_maximum(), - tags_per_msg_minimum()..tags_per_msg_maximum(), - multivalue_count_minimum()..multivalue_count_maximum(), + contexts(), + name_length(), + tag_key_length(), + tag_value_length(), + tags_per_msg(), + multivalue_count(), multivalue_pack_probability(), KindWeights::default(), MetricWeights::default(), @@ -487,12 +468,12 @@ impl DogStatsD { /// Create a new instance of `DogStatsD`. #[allow(clippy::too_many_arguments)] pub fn new( - context_range: Range, - name_length_range: Range, - tag_key_length_range: Range, - tag_value_length_range: Range, - tags_per_msg_range: Range, - multivalue_count_range: Range, + contexts: ConfRange, + name_length: ConfRange, + tag_key_length: ConfRange, + tag_value_length: ConfRange, + tags_per_msg: ConfRange, + multivalue_count: ConfRange, multivalue_pack_probability: f32, kind_weights: KindWeights, metric_weights: MetricWeights, @@ -503,12 +484,12 @@ impl DogStatsD { R: rand::Rng + ?Sized, { let member_generator = MemberGenerator::new( - context_range, - name_length_range, - tag_key_length_range, - tag_value_length_range, - tags_per_msg_range, - multivalue_count_range, + contexts, + name_length, + tag_key_length, + tag_value_length, + tags_per_msg, + multivalue_count, multivalue_pack_probability, kind_weights, metric_weights, @@ -550,11 +531,8 @@ mod test { use crate::{ dogstatsd::{ - contexts_maximum, contexts_minimum, multivalue_count_maximum, multivalue_count_minimum, - multivalue_pack_probability, name_length_maximum, name_length_minimum, - tag_key_length_maximum, tag_key_length_minimum, tag_value_length_maximum, - tag_value_length_minimum, tags_per_msg_maximum, tags_per_msg_minimum, value_config, - KindWeights, MetricWeights, + contexts, multivalue_count, multivalue_pack_probability, name_length, tag_key_length, + tag_value_length, tags_per_msg, value_config, KindWeights, MetricWeights, }, DogStatsD, Serialize, }; @@ -566,20 +544,14 @@ mod test { fn payload_not_exceed_max_bytes(seed: u64, max_bytes: u16) { let max_bytes = max_bytes as usize; let mut rng = SmallRng::seed_from_u64(seed); - let context_range = contexts_minimum()..contexts_maximum(); - let name_length_range = name_length_minimum()..name_length_maximum(); - let tag_key_length_range = tag_key_length_minimum()..tag_key_length_maximum(); - let tag_value_length_range = tag_value_length_minimum()..tag_value_length_maximum(); - let tags_per_msg_range = tags_per_msg_minimum()..tags_per_msg_maximum(); - let multivalue_count_range = multivalue_count_minimum()..multivalue_count_maximum(); let multivalue_pack_probability = multivalue_pack_probability(); let value_conf = value_config(); let kind_weights = KindWeights::default(); let metric_weights = MetricWeights::default(); - let dogstatsd = DogStatsD::new(context_range, name_length_range, tag_key_length_range, - tag_value_length_range, tags_per_msg_range, - multivalue_count_range, multivalue_pack_probability, kind_weights, + let dogstatsd = DogStatsD::new(contexts(), name_length(), tag_key_length(), + tag_value_length(), tags_per_msg(), + multivalue_count(), multivalue_pack_probability, kind_weights, metric_weights, value_conf, &mut rng); let mut bytes = Vec::with_capacity(max_bytes); diff --git a/lading_payload/src/dogstatsd/common.rs b/lading_payload/src/dogstatsd/common.rs index 43b6c10a2..58d4afc45 100644 --- a/lading_payload/src/dogstatsd/common.rs +++ b/lading_payload/src/dogstatsd/common.rs @@ -8,7 +8,7 @@ use rand::{ use crate::Generator; -use super::{ValueConf, ValueRange}; +use super::{ConfRange, ValueConf}; pub(crate) mod tags; @@ -36,12 +36,12 @@ impl NumValueGenerator { #[allow(clippy::cast_possible_truncation)] pub(crate) fn new(conf: ValueConf) -> Self { match conf.range { - ValueRange::Constant(c) => Self::Constant { + ConfRange::Constant(c) => Self::Constant { float_probability: conf.float_probability, int: c, float: c as f64, }, - ValueRange::Inclusive { min, max } => Self::Uniform { + ConfRange::Inclusive { min, max } => Self::Uniform { float_probability: conf.float_probability, int_distr: Uniform::new_inclusive(min, max), float_distr: Uniform::new_inclusive(min as f64, max as f64), diff --git a/lading_payload/src/dogstatsd/common/tags.rs b/lading_payload/src/dogstatsd/common/tags.rs index beb0e610d..d0f6f8e76 100644 --- a/lading_payload/src/dogstatsd/common/tags.rs +++ b/lading_payload/src/dogstatsd/common/tags.rs @@ -1,6 +1,6 @@ -use std::{ops::Range, rc::Rc}; +use std::rc::Rc; -use crate::common::strings; +use crate::{common::strings, dogstatsd::ConfRange}; // This represents a list of tags that will be present on a single // dogstatsd message. @@ -10,9 +10,9 @@ pub(crate) type Tagsets = Vec; pub(crate) struct Generator { pub(crate) num_tagsets: usize, - pub(crate) tags_per_msg_range: Range, - pub(crate) tag_key_length_range: Range, - pub(crate) tag_value_length_range: Range, + pub(crate) tags_per_msg: ConfRange, + pub(crate) tag_key_length: ConfRange, + pub(crate) tag_value_length: ConfRange, pub(crate) str_pool: Rc, } @@ -24,25 +24,17 @@ impl<'a> crate::Generator<'a> for Generator { where R: rand::Rng + ?Sized, { - let tags_per_msg_range = self.tags_per_msg_range.clone(); - let mut tagsets: Vec = Vec::with_capacity(self.num_tagsets); for _ in 0..self.num_tagsets { - let tags_per_msg_range = tags_per_msg_range.clone(); - - let num_tags_for_this_msg = rng.gen_range(tags_per_msg_range); + let num_tags_for_this_msg = self.tags_per_msg.sample(rng) as usize; let mut tagset = Vec::with_capacity(num_tags_for_this_msg); for _ in 0..num_tags_for_this_msg { let mut tag = String::new(); tag.reserve(512); // a guess, big-ish but not too big - let key = self - .str_pool - .of_size_range(&mut rng, self.tag_key_length_range.clone()) - .unwrap(); - let value = self - .str_pool - .of_size_range(&mut rng, self.tag_value_length_range.clone()) - .unwrap(); + let key_sz = self.tag_key_length.sample(&mut rng) as usize; + let key = self.str_pool.of_size(&mut rng, key_sz).unwrap(); + let value_sz = self.tag_value_length.sample(&mut rng) as usize; + let value = self.str_pool.of_size(&mut rng, value_sz).unwrap(); tag.push_str(key); tag.push(':'); tag.push_str(value); @@ -53,3 +45,34 @@ impl<'a> crate::Generator<'a> for Generator { tagsets } } + +#[cfg(test)] +mod test { + use proptest::prelude::*; + use rand::{rngs::SmallRng, SeedableRng}; + + use crate::dogstatsd::{strings, tags, ConfRange}; + use crate::Generator; + use std::rc::Rc; + + // We want to be sure that the serialized size of the payload does not + // exceed `max_bytes`. + proptest! { + #[test] + fn generator_not_exceed_tagset_max(seed: u64, num_tagsets in 0..100_000) { + let mut rng = SmallRng::seed_from_u64(seed); + let num_tagsets = num_tagsets as usize; + let pool = Rc::new(strings::Pool::with_size(&mut rng, 8_000_000)); + + let generator = tags::Generator { + num_tagsets, + tags_per_msg: ConfRange::Inclusive{min: 0, max: 1_000}, + tag_key_length: ConfRange::Inclusive{min: 1, max: 64 }, + tag_value_length: ConfRange::Inclusive{min: 1, max: 64 }, + str_pool: pool, + }; + let tagsets = generator.generate(&mut rng); + assert!(tagsets.len() == num_tagsets); + } + } +} diff --git a/lading_payload/src/dogstatsd/event.rs b/lading_payload/src/dogstatsd/event.rs index 27823c402..6f5a4f05b 100644 --- a/lading_payload/src/dogstatsd/event.rs +++ b/lading_payload/src/dogstatsd/event.rs @@ -4,11 +4,11 @@ use rand::{distributions::Standard, prelude::Distribution, Rng}; use crate::{common::strings, Generator}; -use super::{choose_or_not_fn, choose_or_not_ref, common}; +use super::{choose_or_not_fn, choose_or_not_ref, common, ConfRange}; #[derive(Debug, Clone)] pub(crate) struct EventGenerator { - pub(crate) title_length_range: Range, + pub(crate) title_length: ConfRange, pub(crate) texts_or_messages_length_range: Range, pub(crate) small_strings_length_range: Range, pub(crate) str_pool: Rc, @@ -22,10 +22,8 @@ impl<'a> Generator<'a> for EventGenerator { where R: rand::Rng + ?Sized, { - let title = self - .str_pool - .of_size_range(&mut rng, self.title_length_range.clone()) - .unwrap(); + let title_sz = self.title_length.sample(&mut rng) as usize; + let title = self.str_pool.of_size(&mut rng, title_sz).unwrap(); let text = self .str_pool .of_size_range(&mut rng, self.texts_or_messages_length_range.clone()) diff --git a/lading_payload/src/dogstatsd/metric.rs b/lading_payload/src/dogstatsd/metric.rs index 4b385a189..b2ccf9e38 100644 --- a/lading_payload/src/dogstatsd/metric.rs +++ b/lading_payload/src/dogstatsd/metric.rs @@ -1,4 +1,4 @@ -use std::{fmt, ops::Range}; +use std::fmt; use rand::{ distributions::{OpenClosed01, WeightedIndex}, @@ -12,7 +12,7 @@ use tracing::debug; use super::{ choose_or_not_ref, common::{self, NumValueGenerator}, - ValueConf, + ConfRange, ValueConf, }; mod template; @@ -21,7 +21,7 @@ mod template; pub(crate) struct MetricGenerator { pub(crate) container_ids: Vec, pub(crate) templates: Vec, - pub(crate) multivalue_count_range: Range, + pub(crate) multivalue_count: ConfRange, pub(crate) multivalue_pack_probability: f32, pub(crate) num_value_generator: NumValueGenerator, } @@ -30,8 +30,8 @@ impl MetricGenerator { #[allow(clippy::too_many_arguments)] pub(crate) fn new( num_contexts: usize, - name_length_range: Range, - multivalue_count_range: Range, + name_length: ConfRange, + multivalue_count: ConfRange, multivalue_pack_probability: f32, metric_weights: &WeightedIndex, container_ids: Vec, @@ -45,14 +45,11 @@ impl MetricGenerator { { let mut templates = Vec::with_capacity(num_contexts); - assert!(tagsets.len() >= num_contexts); + assert!(tagsets.len() == num_contexts); debug!("Generating metric templates for {} contexts.", num_contexts); for tags in tagsets { - let name = String::from( - str_pool - .of_size_range(&mut rng, name_length_range.clone()) - .unwrap(), - ); + let name_sz = name_length.sample(&mut rng) as usize; + let name = String::from(str_pool.of_size(&mut rng, name_sz).unwrap()); let res = match metric_weights.sample(rng) { 0 => Template::Count(template::Count { name, tags }), @@ -69,7 +66,7 @@ impl MetricGenerator { MetricGenerator { container_ids, templates, - multivalue_count_range, + multivalue_count, multivalue_pack_probability, num_value_generator: NumValueGenerator::new(value_conf), } @@ -94,13 +91,13 @@ impl<'a> Generator<'a> for MetricGenerator { // https://docs.datadoghq.com/metrics/custom_metrics/dogstatsd_metrics_submission/#sample-rates let sample_rate = rng.gen(); - let mut values = Vec::with_capacity(self.multivalue_count_range.end as usize); + let mut values = Vec::with_capacity(self.multivalue_count.end() as usize); let value: common::NumValue = self.num_value_generator.generate(&mut rng); values.push(value); let prob: f32 = OpenClosed01.sample(&mut rng); if prob < self.multivalue_pack_probability { - let num_desired_values = rng.gen_range(self.multivalue_count_range.clone()); + let num_desired_values = self.multivalue_count.sample(&mut rng) as usize; for _ in 1..num_desired_values { values.push(self.num_value_generator.generate(&mut rng)); }