From 9ceabe77c00d2cc5d8267e16515115b443358be6 Mon Sep 17 00:00:00 2001 From: Xavier Vello Date: Thu, 26 Oct 2023 17:12:55 +0200 Subject: [PATCH] add more metrics (#35) --- capture/src/billing_limits.rs | 2 +- capture/src/capture.rs | 13 +++++++++++-- capture/src/prometheus.rs | 5 +++++ capture/src/sink.rs | 7 ++++--- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/capture/src/billing_limits.rs b/capture/src/billing_limits.rs index 44a997c..4309c21 100644 --- a/capture/src/billing_limits.rs +++ b/capture/src/billing_limits.rs @@ -51,7 +51,7 @@ pub struct BillingLimiter { limited: Arc>>, redis: Arc, interval: Duration, - updated: Arc>, + updated: Arc>, } impl BillingLimiter { diff --git a/capture/src/capture.rs b/capture/src/capture.rs index 65e64c9..0413d44 100644 --- a/capture/src/capture.rs +++ b/capture/src/capture.rs @@ -10,6 +10,8 @@ use axum::extract::{Query, State}; use axum::http::HeaderMap; use axum_client_ip::InsecureClientIp; use base64::Engine; +use metrics::counter; + use time::OffsetDateTime; use crate::billing_limits::QuotaResource; @@ -50,7 +52,13 @@ pub async fn event( if events.is_empty() { return Err(CaptureError::EmptyBatch); } - let token = extract_and_verify_token(&events)?; + + let token = extract_and_verify_token(&events).map_err(|err| { + counter!("capture_token_shape_invalid_total", events.len() as u64); + err + })?; + + counter!("capture_events_received_total", events.len() as u64); let sent_at = meta.sent_at.and_then(|value| { let value_nanos: i128 = i128::from(value) * 1_000_000; // Assuming the value is in milliseconds, latest posthog-js releases @@ -77,13 +85,14 @@ pub async fn event( .await; if limited { + counter!("capture_events_dropped_over_quota", 1); + // for v0 we want to just return ok 🙃 // this is because the clients are pretty dumb and will just retry over and over and // over... // // for v1, we'll return a meaningful error code and error, so that the clients can do // something meaningful with that error - return Ok(Json(CaptureResponse { status: CaptureResponseCode::Ok, })); diff --git a/capture/src/prometheus.rs b/capture/src/prometheus.rs index 1fcdb7d..0cb4750 100644 --- a/capture/src/prometheus.rs +++ b/capture/src/prometheus.rs @@ -10,6 +10,9 @@ pub fn setup_metrics_recorder() -> PrometheusHandle { const EXPONENTIAL_SECONDS: &[f64] = &[ 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, ]; + const BATCH_SIZES: &[f64] = &[ + 1.0, 10.0, 25.0, 50.0, 75.0, 100.0, 250.0, 500.0, 750.0, 1000.0, + ]; PrometheusBuilder::new() .set_buckets_for_metric( @@ -17,6 +20,8 @@ pub fn setup_metrics_recorder() -> PrometheusHandle { EXPONENTIAL_SECONDS, ) .unwrap() + .set_buckets_for_metric(Matcher::Suffix("_batch_size".to_string()), BATCH_SIZES) + .unwrap() .install_recorder() .unwrap() } diff --git a/capture/src/sink.rs b/capture/src/sink.rs index e6f4b7b..6f0cbef 100644 --- a/capture/src/sink.rs +++ b/capture/src/sink.rs @@ -1,4 +1,5 @@ use async_trait::async_trait; +use metrics::{counter, histogram}; use tokio::task::JoinSet; use crate::api::CaptureError; @@ -20,8 +21,7 @@ pub struct PrintSink {} impl EventSink for PrintSink { async fn send(&self, event: ProcessedEvent) -> Result<(), CaptureError> { tracing::info!("single event: {:?}", event); - - metrics::increment_counter!("capture_events_total"); + counter!("capture_events_ingested_total", 1); Ok(()) } @@ -29,8 +29,9 @@ impl EventSink for PrintSink { let span = tracing::span!(tracing::Level::INFO, "batch of events"); let _enter = span.enter(); + histogram!("capture_event_batch_size", events.len() as f64); + counter!("capture_events_ingested_total", events.len() as u64); for event in events { - metrics::increment_counter!("capture_events_total"); tracing::info!("event: {:?}", event); }