From 6b43b44060e3a76f724fe85aea837e55d2843c1f Mon Sep 17 00:00:00 2001 From: Jacob Pratt Date: Sat, 19 Oct 2024 04:39:42 -0400 Subject: [PATCH] strftime implementation --- tests/parse_format_description.rs | 73 +++ time/src/error/invalid_format_description.rs | 12 +- time/src/format_description/mod.rs | 4 +- time/src/format_description/parse/ast.rs | 4 +- time/src/format_description/parse/lexer.rs | 15 +- time/src/format_description/parse/mod.rs | 23 + time/src/format_description/parse/strftime.rs | 478 ++++++++++++++++++ 7 files changed, 588 insertions(+), 21 deletions(-) create mode 100644 time/src/format_description/parse/strftime.rs diff --git a/tests/parse_format_description.rs b/tests/parse_format_description.rs index d9d7fa777..3d44e4e6a 100644 --- a/tests/parse_format_description.rs +++ b/tests/parse_format_description.rs @@ -5,6 +5,7 @@ use rstest_reuse::{apply, template}; use time::error::InvalidFormatDescription; use time::format_description::modifier::*; use time::format_description::{self, BorrowedFormatItem, Component, OwnedFormatItem}; +use time::macros::format_description; /// Identical to `modifier!`, but obtains the value from `M` automagically. macro_rules! modifier_m { @@ -798,3 +799,75 @@ fn rfc_3339() { ]) ); } + +#[rstest] +#[case("foo", format_description!("foo"))] +#[case("%a", format_description!("[weekday repr:short]"))] +#[case("%A", format_description!("[weekday]"))] +#[case("%b", format_description!("[month repr:short]"))] +#[case("%B", format_description!("[month repr:long]"))] +#[case("%C", format_description!("[year repr:century]"))] +#[case("%d", format_description!("[day]"))] +#[case("%e", format_description!("[day padding:space]"))] +#[case("%g", format_description!("[year repr:last_two base:iso_week]"))] +#[case("%G", format_description!("[year base:iso_week]"))] +#[case("%h", format_description!("[month repr:short]"))] +#[case("%H", format_description!("[hour]"))] +#[case("%I", format_description!("[hour repr:12]"))] +#[case("%j", format_description!("[ordinal]"))] +#[case("%k", format_description!("[hour padding:space]"))] +#[case("%l", format_description!("[hour repr:12 padding:space]"))] +#[case("%m", format_description!("[month]"))] +#[case("%M", format_description!("[minute]"))] +#[case("%n", format_description!("\n"))] +#[case("%p", format_description!("[period]"))] +#[case("%P", format_description!("[period case:lower]"))] +#[case("%s", format_description!("[unix_timestamp]"))] +#[case("%S", format_description!("[second]"))] +#[case("%t", format_description!("\t"))] +#[case("%u", format_description!("[weekday repr:monday]"))] +#[case("%U", format_description!("[week_number repr:sunday]"))] +#[case("%V", format_description!("[week_number]"))] +#[case("%w", format_description!("[weekday repr:sunday]"))] +#[case("%W", format_description!("[week_number repr:monday]"))] +#[case("%y", format_description!("[year repr:last_two]"))] +#[case("%Y", format_description!("[year]"))] +#[case("%%", format_description!("%"))] +fn strftime_equivalence( + #[case] strftime: &str, + #[case] custom: &[BorrowedFormatItem<'_>], +) -> time::Result<()> { + let borrowed = format_description::parse_strftime_borrowed(strftime)?; + let owned = format_description::parse_strftime_owned(strftime)?; + + assert_eq!(borrowed, custom); + assert_eq!(owned, OwnedFormatItem::from(custom)); + + Ok(()) +} + +#[rstest] +#[case( + "%c", + "[weekday repr:short] [month repr:short] [day padding:space] [hour]:[minute]:[second] [year]" +)] +#[case("%D", "[month]/[day]/[year repr:last_two]")] +#[case("%F", "[year]-[month repr:numerical]-[day]")] +#[case("%r", "[hour repr:12]:[minute]:[second] [period]")] +#[case("%R", "[hour]:[minute]")] +#[case("%T", "[hour]:[minute]:[second]")] +#[case("%x", "[month]/[day]/[year repr:last_two]")] +#[case("%X", "[hour]:[minute]:[second]")] +#[case("%z", "[offset_hour sign:mandatory][offset_minute]")] +fn strftime_compound_equivalence(#[case] strftime: &str, #[case] custom: &str) -> time::Result<()> { + let borrowed = format_description::parse_strftime_borrowed(strftime)?; + let owned = format_description::parse_strftime_owned(strftime)?; + let custom = format_description::parse(custom)?; + // Until equality is implemented better, we need to convert to a compound. + let custom = vec![BorrowedFormatItem::Compound(&custom)]; + + assert_eq!(borrowed, custom); + assert_eq!(owned, OwnedFormatItem::from(custom)); + + Ok(()) +} diff --git a/time/src/error/invalid_format_description.rs b/time/src/error/invalid_format_description.rs index b2f5b7bda..f07eeb87c 100644 --- a/time/src/error/invalid_format_description.rs +++ b/time/src/error/invalid_format_description.rs @@ -115,10 +115,14 @@ impl fmt::Display for InvalidFormatDescription { context, index, } => { - write!( - f, - "{what} is not supported in {context} at byte index {index}" - ) + if context.is_empty() { + write!(f, "{what} is not supported at byte index {index}") + } else { + write!( + f, + "{what} is not supported in {context} at byte index {index}" + ) + } } } } diff --git a/time/src/format_description/mod.rs b/time/src/format_description/mod.rs index 4bd7aa5dd..9e7d5202d 100644 --- a/time/src/format_description/mod.rs +++ b/time/src/format_description/mod.rs @@ -24,7 +24,9 @@ pub use owned_format_item::OwnedFormatItem; pub use self::component::Component; #[cfg(feature = "alloc")] -pub use self::parse::{parse, parse_borrowed, parse_owned}; +pub use self::parse::{ + parse, parse_borrowed, parse_owned, parse_strftime_borrowed, parse_strftime_owned, +}; /// Well-known formats, typically standards. pub mod well_known { diff --git a/time/src/format_description/parse/ast.rs b/time/src/format_description/parse/ast.rs index c7fc5e05a..f20386f96 100644 --- a/time/src/format_description/parse/ast.rs +++ b/time/src/format_description/parse/ast.rs @@ -188,7 +188,7 @@ fn parse_component< let Some(name) = tokens.next_if_not_whitespace() else { let span = match leading_whitespace { Some(Spanned { value: _, span }) => span, - None => opening_bracket.to(opening_bracket), + None => opening_bracket.to_self(), }; return Err(Error { _inner: unused(span.error("expected component name")), @@ -277,7 +277,7 @@ fn parse_component< return Err(Error { _inner: unused( location - .to(location) + .to_self() .error("modifier must be of the form `key:value`"), ), public: crate::error::InvalidFormatDescription::InvalidModifier { diff --git a/time/src/format_description/parse/lexer.rs b/time/src/format_description/parse/lexer.rs index 2979fff9e..fec36eb7c 100644 --- a/time/src/format_description/parse/lexer.rs +++ b/time/src/format_description/parse/lexer.rs @@ -2,7 +2,7 @@ use core::iter; -use super::{unused, Error, Location, Spanned, SpannedValue}; +use super::{attach_location, unused, Error, Location, Spanned, SpannedValue}; /// An iterator over the lexed tokens. pub(super) struct Lexed { @@ -130,19 +130,6 @@ pub(super) enum ComponentKind { NotWhitespace, } -/// Attach [`Location`] information to each byte in the iterator. -fn attach_location<'item>( - iter: impl Iterator, -) -> impl Iterator { - let mut byte_pos = 0; - - iter.map(move |byte| { - let location = Location { byte: byte_pos }; - byte_pos += 1; - (byte, location) - }) -} - /// Parse the string into a series of [`Token`]s. /// /// `VERSION` controls the version of the format description that is being parsed. Currently, this diff --git a/time/src/format_description/parse/mod.rs b/time/src/format_description/parse/mod.rs index 602ecf71f..3fe96510f 100644 --- a/time/src/format_description/parse/mod.rs +++ b/time/src/format_description/parse/mod.rs @@ -3,6 +3,7 @@ use alloc::boxed::Box; use alloc::vec::Vec; +pub use self::strftime::{parse_strftime_borrowed, parse_strftime_owned}; use crate::{error, format_description}; /// A helper macro to make version restrictions simpler to read and write. @@ -23,6 +24,7 @@ macro_rules! validate_version { mod ast; mod format_item; mod lexer; +mod strftime; /// A struct that is used to ensure that the version is valid. struct Version; @@ -84,6 +86,19 @@ pub fn parse_owned( Ok(items.into()) } +/// Attach [`Location`] information to each byte in the iterator. +fn attach_location<'item>( + iter: impl Iterator, +) -> impl Iterator { + let mut byte_pos = 0; + + iter.map(move |byte| { + let location = Location { byte: byte_pos }; + byte_pos += 1; + (byte, location) + }) +} + /// A location within a string. #[derive(Clone, Copy)] struct Location { @@ -97,6 +112,14 @@ impl Location { Span { start: self, end } } + /// Create a new [`Span`] consisting entirely of `self`. + const fn to_self(self) -> Span { + Span { + start: self, + end: self, + } + } + /// Offset the location by the provided amount. /// /// Note that this assumes the resulting location is on the same line as the original location. diff --git a/time/src/format_description/parse/strftime.rs b/time/src/format_description/parse/strftime.rs new file mode 100644 index 000000000..69c7786c6 --- /dev/null +++ b/time/src/format_description/parse/strftime.rs @@ -0,0 +1,478 @@ +use alloc::string::String; +use alloc::vec::Vec; +use core::iter; + +use crate::error::InvalidFormatDescription; +use crate::format_description::parse::{ + attach_location, unused, Error, ErrorInner, Location, Spanned, SpannedValue, Unused, +}; +use crate::format_description::{self, modifier, BorrowedFormatItem, Component}; + +/// Parse a sequence of items from the [`strftime` format description][strftime docs]. +/// +/// The only heap allocation required is for the `Vec` itself. All components are bound to the +/// lifetime of the input. +/// +/// [strftime docs]: https://man7.org/linux/man-pages/man3/strftime.3.html +#[doc(alias = "parse_strptime_borrowed")] +pub fn parse_strftime_borrowed( + s: &str, +) -> Result>, InvalidFormatDescription> { + let tokens = lex(s.as_bytes()); + let items = into_items(tokens).collect::>()?; + Ok(items) +} + +/// Parse a sequence of items from the [`strftime` format description][strftime docs]. +/// +/// This requires heap allocation for some owned items. +/// +/// [strftime docs]: https://man7.org/linux/man-pages/man3/strftime.3.html +#[doc(alias = "parse_strptime_owned")] +pub fn parse_strftime_owned( + s: &str, +) -> Result { + parse_strftime_borrowed(s).map(Into::into) +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum Padding { + /// The default padding for a numeric component. Indicated by no character. + Default, + /// Pad a numeric component with spaces. Indicated by an underscore. + Spaces, + /// Do not pad a numeric component. Indicated by a hyphen. + None, + /// Pad a numeric component with zeroes. Indicated by a zero. + Zeroes, +} + +enum Token<'a> { + Literal(Spanned<&'a [u8]>), + Component { + _percent: Unused, + padding: Spanned, + component: Spanned, + }, +} + +fn lex(mut input: &[u8]) -> iter::Peekable, Error>>> { + let mut iter = attach_location(input.iter()).peekable(); + + iter::from_fn(move || { + Some(Ok(match iter.next()? { + (b'%', percent_loc) => match iter.next() { + Some((padding @ (b'_' | b'-' | b'0'), padding_loc)) => { + let padding = match padding { + b'_' => Padding::Spaces, + b'-' => Padding::None, + b'0' => Padding::Zeroes, + _ => unreachable!(), + }; + let (&component, component_loc) = iter.next()?; + input = &input[3..]; + Token::Component { + _percent: unused(percent_loc), + padding: padding.spanned(padding_loc.to_self()), + component: component.spanned(component_loc.to_self()), + } + } + Some((&component, component_loc)) => { + input = &input[2..]; + let span = component_loc.to_self(); + Token::Component { + _percent: unused(percent_loc), + padding: Padding::Default.spanned(span), + component: component.spanned(span), + } + } + None => { + return Some(Err(Error { + _inner: unused(percent_loc.error("unexpected end of input")), + public: InvalidFormatDescription::Expected { + what: "valid escape sequence", + index: percent_loc.byte as _, + }, + })); + } + }, + (_, start_location) => { + let mut bytes = 1; + let mut end_location = start_location; + + while let Some((_, location)) = iter.next_if(|&(&byte, _)| byte != b'%') { + end_location = location; + bytes += 1; + } + + let value = &input[..bytes]; + input = &input[bytes..]; + + Token::Literal(value.spanned(start_location.to(end_location))) + } + })) + }) + .peekable() +} + +fn into_items<'iter, 'token: 'iter>( + mut tokens: iter::Peekable, Error>> + 'iter>, +) -> impl Iterator, Error>> + 'iter { + iter::from_fn(move || { + let next = match tokens.next()? { + Ok(token) => token, + Err(err) => return Some(Err(err)), + }; + + Some(match next { + Token::Literal(spanned) => Ok(BorrowedFormatItem::Literal(*spanned)), + Token::Component { + _percent, + padding, + component, + } => parse_component(padding, component), + }) + }) +} + +fn parse_component( + padding: Spanned, + component: Spanned, +) -> Result, Error> { + let padding_or_default = |padding: Padding, default| match padding { + Padding::Default => default, + Padding::Spaces => modifier::Padding::Space, + Padding::None => modifier::Padding::None, + Padding::Zeroes => modifier::Padding::Zero, + }; + + /// Helper macro to create a component. + macro_rules! component { + ($name:ident { $($inner:tt)* }) => { + BorrowedFormatItem::Component(Component::$name(modifier::$name { + $($inner)* + })) + } + } + + Ok(match *component { + b'%' => BorrowedFormatItem::Literal(b"%"), + b'a' => component!(Weekday { + repr: modifier::WeekdayRepr::Short, + one_indexed: true, + case_sensitive: true, + }), + b'A' => component!(Weekday { + repr: modifier::WeekdayRepr::Long, + one_indexed: true, + case_sensitive: true, + }), + b'b' | b'h' => component!(Month { + repr: modifier::MonthRepr::Short, + padding: modifier::Padding::Zero, + case_sensitive: true, + }), + b'B' => component!(Month { + repr: modifier::MonthRepr::Long, + padding: modifier::Padding::Zero, + case_sensitive: true, + }), + b'c' => BorrowedFormatItem::Compound(&[ + component!(Weekday { + repr: modifier::WeekdayRepr::Short, + one_indexed: true, + case_sensitive: true, + }), + BorrowedFormatItem::Literal(b" "), + component!(Month { + repr: modifier::MonthRepr::Short, + padding: modifier::Padding::Zero, + case_sensitive: true, + }), + BorrowedFormatItem::Literal(b" "), + component!(Day { + padding: modifier::Padding::Space + }), + BorrowedFormatItem::Literal(b" "), + component!(Hour { + padding: modifier::Padding::Zero, + is_12_hour_clock: false, + }), + BorrowedFormatItem::Literal(b":"), + component!(Minute { + padding: modifier::Padding::Zero, + }), + BorrowedFormatItem::Literal(b":"), + component!(Second { + padding: modifier::Padding::Zero, + }), + BorrowedFormatItem::Literal(b" "), + component!(Year { + padding: modifier::Padding::Zero, + repr: modifier::YearRepr::Full, + iso_week_based: false, + sign_is_mandatory: false, + }), + ]), + b'C' => component!(Year { + padding: padding_or_default(*padding, modifier::Padding::Zero), + repr: modifier::YearRepr::Century, + iso_week_based: false, + sign_is_mandatory: false, + }), + b'd' => component!(Day { + padding: padding_or_default(*padding, modifier::Padding::Zero), + }), + b'D' => BorrowedFormatItem::Compound(&[ + component!(Month { + repr: modifier::MonthRepr::Numerical, + padding: modifier::Padding::Zero, + case_sensitive: true, + }), + BorrowedFormatItem::Literal(b"/"), + component!(Day { + padding: modifier::Padding::Zero, + }), + BorrowedFormatItem::Literal(b"/"), + component!(Year { + padding: modifier::Padding::Zero, + repr: modifier::YearRepr::LastTwo, + iso_week_based: false, + sign_is_mandatory: false, + }), + ]), + b'e' => component!(Day { + padding: padding_or_default(*padding, modifier::Padding::Space), + }), + b'F' => BorrowedFormatItem::Compound(&[ + component!(Year { + padding: modifier::Padding::Zero, + repr: modifier::YearRepr::Full, + iso_week_based: false, + sign_is_mandatory: false, + }), + BorrowedFormatItem::Literal(b"-"), + component!(Month { + padding: modifier::Padding::Zero, + repr: modifier::MonthRepr::Numerical, + case_sensitive: true, + }), + BorrowedFormatItem::Literal(b"-"), + component!(Day { + padding: modifier::Padding::Zero, + }), + ]), + b'g' => component!(Year { + padding: padding_or_default(*padding, modifier::Padding::Zero), + repr: modifier::YearRepr::LastTwo, + iso_week_based: true, + sign_is_mandatory: false, + }), + b'G' => component!(Year { + padding: modifier::Padding::Zero, + repr: modifier::YearRepr::Full, + iso_week_based: true, + sign_is_mandatory: false, + }), + b'H' => component!(Hour { + padding: padding_or_default(*padding, modifier::Padding::Zero), + is_12_hour_clock: false, + }), + b'I' => component!(Hour { + padding: padding_or_default(*padding, modifier::Padding::Zero), + is_12_hour_clock: true, + }), + b'j' => component!(Ordinal { + padding: padding_or_default(*padding, modifier::Padding::Zero), + }), + b'k' => component!(Hour { + padding: padding_or_default(*padding, modifier::Padding::Space), + is_12_hour_clock: false, + }), + b'l' => component!(Hour { + padding: padding_or_default(*padding, modifier::Padding::Space), + is_12_hour_clock: true, + }), + b'm' => component!(Month { + padding: padding_or_default(*padding, modifier::Padding::Zero), + repr: modifier::MonthRepr::Numerical, + case_sensitive: true, + }), + b'M' => component!(Minute { + padding: padding_or_default(*padding, modifier::Padding::Zero), + }), + b'n' => BorrowedFormatItem::Literal(b"\n"), + b'O' => { + return Err(Error { + _inner: unused(ErrorInner { + _message: "unsupported modifier", + _span: component.span, + }), + public: InvalidFormatDescription::NotSupported { + what: "modifier", + context: "", + index: component.span.start.byte as _, + }, + }) + } + b'p' => component!(Period { + is_uppercase: true, + case_sensitive: true + }), + b'P' => component!(Period { + is_uppercase: false, + case_sensitive: true + }), + b'r' => BorrowedFormatItem::Compound(&[ + component!(Hour { + padding: modifier::Padding::Zero, + is_12_hour_clock: true, + }), + BorrowedFormatItem::Literal(b":"), + component!(Minute { + padding: modifier::Padding::Zero, + }), + BorrowedFormatItem::Literal(b":"), + component!(Second { + padding: modifier::Padding::Zero, + }), + BorrowedFormatItem::Literal(b" "), + component!(Period { + is_uppercase: true, + case_sensitive: true, + }), + ]), + b'R' => BorrowedFormatItem::Compound(&[ + component!(Hour { + padding: modifier::Padding::Zero, + is_12_hour_clock: false, + }), + BorrowedFormatItem::Literal(b":"), + component!(Minute { + padding: modifier::Padding::Zero, + }), + ]), + b's' => component!(UnixTimestamp { + precision: modifier::UnixTimestampPrecision::Second, + sign_is_mandatory: false, + }), + b'S' => component!(Second { + padding: padding_or_default(*padding, modifier::Padding::Zero), + }), + b't' => BorrowedFormatItem::Literal(b"\t"), + b'T' => BorrowedFormatItem::Compound(&[ + component!(Hour { + padding: modifier::Padding::Zero, + is_12_hour_clock: false, + }), + BorrowedFormatItem::Literal(b":"), + component!(Minute { + padding: modifier::Padding::Zero, + }), + BorrowedFormatItem::Literal(b":"), + component!(Second { + padding: modifier::Padding::Zero, + }), + ]), + b'u' => component!(Weekday { + repr: modifier::WeekdayRepr::Monday, + one_indexed: true, + case_sensitive: true, + }), + b'U' => component!(WeekNumber { + padding: padding_or_default(*padding, modifier::Padding::Zero), + repr: modifier::WeekNumberRepr::Sunday, + }), + b'V' => component!(WeekNumber { + padding: padding_or_default(*padding, modifier::Padding::Zero), + repr: modifier::WeekNumberRepr::Iso, + }), + b'w' => component!(Weekday { + repr: modifier::WeekdayRepr::Sunday, + one_indexed: true, + case_sensitive: true, + }), + b'W' => component!(WeekNumber { + padding: padding_or_default(*padding, modifier::Padding::Zero), + repr: modifier::WeekNumberRepr::Monday, + }), + b'x' => BorrowedFormatItem::Compound(&[ + component!(Month { + repr: modifier::MonthRepr::Numerical, + padding: modifier::Padding::Zero, + case_sensitive: true, + }), + BorrowedFormatItem::Literal(b"/"), + component!(Day { + padding: modifier::Padding::Zero + }), + BorrowedFormatItem::Literal(b"/"), + component!(Year { + padding: modifier::Padding::Zero, + repr: modifier::YearRepr::LastTwo, + iso_week_based: false, + sign_is_mandatory: false, + }), + ]), + b'X' => BorrowedFormatItem::Compound(&[ + component!(Hour { + padding: modifier::Padding::Zero, + is_12_hour_clock: false, + }), + BorrowedFormatItem::Literal(b":"), + component!(Minute { + padding: modifier::Padding::Zero, + }), + BorrowedFormatItem::Literal(b":"), + component!(Second { + padding: modifier::Padding::Zero, + }), + ]), + b'y' => component!(Year { + padding: padding_or_default(*padding, modifier::Padding::Zero), + repr: modifier::YearRepr::LastTwo, + iso_week_based: false, + sign_is_mandatory: false, + }), + b'Y' => component!(Year { + padding: modifier::Padding::Zero, + repr: modifier::YearRepr::Full, + iso_week_based: false, + sign_is_mandatory: false, + }), + b'z' => BorrowedFormatItem::Compound(&[ + component!(OffsetHour { + sign_is_mandatory: true, + padding: modifier::Padding::Zero, + }), + component!(OffsetMinute { + padding: modifier::Padding::Zero, + }), + ]), + b'Z' => { + return Err(Error { + _inner: unused(ErrorInner { + _message: "unsupported component", + _span: component.span, + }), + public: InvalidFormatDescription::NotSupported { + what: "component", + context: "", + index: component.span.start.byte as _, + }, + }) + } + _ => { + return Err(Error { + _inner: unused(ErrorInner { + _message: "invalid component", + _span: component.span, + }), + public: InvalidFormatDescription::InvalidComponentName { + name: String::from_utf8_lossy(&[*component]).into_owned(), + index: component.span.start.byte as _, + }, + }) + } + }) +}