From 8f2102665949c7b87bd49ea82854c1d3306ae0cc Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 29 Mar 2024 05:36:09 -0700 Subject: [PATCH] Handle missing year in date time parser (#4811) --- .../quickwit-datetime/src/date_time_format.rs | 48 ++++++++++++++++++- .../src/date_time_parsing.rs | 8 ++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-datetime/src/date_time_format.rs b/quickwit/quickwit-datetime/src/date_time_format.rs index 2b04afa79e6..42b282ef6db 100644 --- a/quickwit/quickwit-datetime/src/date_time_format.rs +++ b/quickwit/quickwit-datetime/src/date_time_format.rs @@ -28,7 +28,7 @@ use time::error::Format; use time::format_description::well_known::{Iso8601, Rfc2822, Rfc3339}; use time::format_description::FormatItem; use time::parsing::Parsed; -use time::{OffsetDateTime, PrimitiveDateTime}; +use time::{Month, OffsetDateTime, PrimitiveDateTime}; use time_fmt::parse::time_format_item::parse_to_format_item; use crate::TantivyDateTime; @@ -84,6 +84,11 @@ impl StrptimeParser { parsed.set_minute(0u8); parsed.set_second(0u8); } + if parsed.year().is_none() { + let now = OffsetDateTime::now_utc(); + let year = infer_year(parsed.month(), now.month(), now.year()); + parsed.set_year(year); + } let date_time = parsed.try_into()?; Ok(date_time) } @@ -318,9 +323,26 @@ impl<'de> Deserialize<'de> for DateTimeOutputFormat { } } +/// Infers the year of a parsed date time. It assumes that events appear more often delayed than in +/// the future and, as a result, skews towards the past year. +pub(super) fn infer_year( + parsed_month_opt: Option, + this_month: Month, + this_year: i32, +) -> i32 { + let Some(parsed_month) = parsed_month_opt else { + return this_year; + }; + if parsed_month as u8 > this_month as u8 + 3 { + return this_year - 1; + } + this_year +} + #[cfg(test)] mod tests { use time::macros::datetime; + use time::Month; use super::*; @@ -453,4 +475,28 @@ mod tests { "datetime string `2021-01-01TABC` does not match strptime format `%Y-%m-%d`" ); } + + #[test] + fn test_infer_year() { + let inferred_year = infer_year(None, Month::January, 2024); + assert_eq!(inferred_year, 2024); + + let inferred_year = infer_year(Some(Month::December), Month::January, 2024); + assert_eq!(inferred_year, 2023); + + let inferred_year = infer_year(Some(Month::January), Month::January, 2024); + assert_eq!(inferred_year, 2024); + + let inferred_year = infer_year(Some(Month::February), Month::January, 2024); + assert_eq!(inferred_year, 2024); + + let inferred_year = infer_year(Some(Month::March), Month::January, 2024); + assert_eq!(inferred_year, 2024); + + let inferred_year = infer_year(Some(Month::April), Month::January, 2024); + assert_eq!(inferred_year, 2024); + + let inferred_year = infer_year(Some(Month::May), Month::January, 2024); + assert_eq!(inferred_year, 2023); + } } diff --git a/quickwit/quickwit-datetime/src/date_time_parsing.rs b/quickwit/quickwit-datetime/src/date_time_parsing.rs index b048efcbaaf..34b9b62e6bb 100644 --- a/quickwit/quickwit-datetime/src/date_time_parsing.rs +++ b/quickwit/quickwit-datetime/src/date_time_parsing.rs @@ -182,8 +182,10 @@ mod tests { use std::str::FromStr; use time::macros::datetime; + use time::Month; use super::*; + use crate::date_time_format::infer_year; use crate::StrptimeParser; #[test] @@ -247,6 +249,12 @@ mod tests { "2024-01-31 18:40:19.950188123", datetime!(2024-01-31 18:40:19.950188123 UTC), ), + ("%b %d %H:%M:%S", "Mar 6 17:40:02", { + let dt = datetime!(1900-03-06 17:40:02 UTC); + let now = OffsetDateTime::now_utc(); + let year = infer_year(Some(Month::March), now.month(), now.year()); + dt.replace_year(year).unwrap() + }), ]; for (fmt, date_time_str, expected) in test_data { let parser = StrptimeParser::from_str(fmt).unwrap();