diff --git a/Cargo.toml b/Cargo.toml index 629999aa..b84bb72c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,6 +71,7 @@ parking_lot = { version = "0.12.0", optional = true } thiserror = "1.0.15" anyhow = "1.0.28" derivative = "2.2" +unicode-segmentation = "1.10.0" [target.'cfg(windows)'.dependencies] winapi = { version = "0.3", optional = true, features = ["handleapi", "minwindef", "processenv", "winbase", "wincon"] } diff --git a/src/encode/pattern/mod.rs b/src/encode/pattern/mod.rs index 5215f2ec..bfede0bc 100644 --- a/src/encode/pattern/mod.rs +++ b/src/encode/pattern/mod.rs @@ -12,11 +12,12 @@ //! name := identifier //! argument := format_string //! -//! format_spec := [ [ fill ] align ] [ min_width ] [ '.' max_width ] +//! format_spec := [ [ fill ] align ] [left_truncate] [ min_width ] [ '.' max_width ] //! fill := character //! align := '<' | '>' //! min_width := number //! max_width := number +//! left_truncate := '-' //! ``` //! //! # Special characters @@ -101,6 +102,10 @@ //! configured. Any output over the maximum length will be truncated, and //! output under the minimum length will be padded (see above). //! +//! Truncation will cut the right end of the contents, unless left truncation +//! is specified (with a minus sign). Left/right truncation and left/right +//! alignment are specified independently. +//! //! # Examples //! //! The default pattern is `{d} {l} {t} - {m}{n}` which produces output like @@ -119,12 +124,17 @@ //! INFO hello , while the message `hello, world!` and log //! level `DEBUG` will be truncated to `DEBUG hello, wo`. //! +//! The pattern `{({l} {m}):-15.15}` will behave as above, except the truncation +//! will be from the left. For example, at `DEBUG` level, and a message of +//! `hello, world!`, the output will be: `G hello, world!` +//! //! [MDC]: https://crates.io/crates/log-mdc use chrono::{Local, Utc}; use derivative::Derivative; use log::{Level, Record}; -use std::{default::Default, io, process, thread}; +use std::{default::Default, io, mem, process, thread}; +use unicode_segmentation::{GraphemeCursor, UnicodeSegmentation}; use crate::encode::{ self, @@ -150,157 +160,6 @@ pub struct PatternEncoderConfig { pattern: Option, } -fn is_char_boundary(b: u8) -> bool { - b as i8 >= -0x40 -} - -fn char_starts(buf: &[u8]) -> usize { - buf.iter().filter(|&&b| is_char_boundary(b)).count() -} - -struct MaxWidthWriter<'a> { - remaining: usize, - w: &'a mut dyn encode::Write, -} - -impl<'a> io::Write for MaxWidthWriter<'a> { - fn write(&mut self, buf: &[u8]) -> io::Result { - let mut remaining = self.remaining; - let mut end = buf.len(); - for (idx, _) in buf - .iter() - .enumerate() - .filter(|&(_, &b)| is_char_boundary(b)) - { - if remaining == 0 { - end = idx; - break; - } - remaining -= 1; - } - - // we don't want to report EOF, so just act as a sink past this point - if end == 0 { - return Ok(buf.len()); - } - - let buf = &buf[..end]; - match self.w.write(buf) { - Ok(len) => { - if len == end { - self.remaining = remaining; - } else { - self.remaining -= char_starts(&buf[..len]); - } - Ok(len) - } - Err(e) => Err(e), - } - } - - fn flush(&mut self) -> io::Result<()> { - self.w.flush() - } -} - -impl<'a> encode::Write for MaxWidthWriter<'a> { - fn set_style(&mut self, style: &Style) -> io::Result<()> { - self.w.set_style(style) - } -} - -struct LeftAlignWriter { - to_fill: usize, - fill: char, - w: W, -} - -impl LeftAlignWriter { - fn finish(mut self) -> io::Result<()> { - for _ in 0..self.to_fill { - write!(self.w, "{}", self.fill)?; - } - Ok(()) - } -} - -impl io::Write for LeftAlignWriter { - fn write(&mut self, buf: &[u8]) -> io::Result { - match self.w.write(buf) { - Ok(len) => { - self.to_fill = self.to_fill.saturating_sub(char_starts(&buf[..len])); - Ok(len) - } - Err(e) => Err(e), - } - } - - fn flush(&mut self) -> io::Result<()> { - self.w.flush() - } -} - -impl encode::Write for LeftAlignWriter { - fn set_style(&mut self, style: &Style) -> io::Result<()> { - self.w.set_style(style) - } -} - -enum BufferedOutput { - Data(Vec), - Style(Style), -} - -struct RightAlignWriter { - to_fill: usize, - fill: char, - w: W, - buf: Vec, -} - -impl RightAlignWriter { - fn finish(mut self) -> io::Result<()> { - for _ in 0..self.to_fill { - write!(self.w, "{}", self.fill)?; - } - for out in self.buf { - match out { - BufferedOutput::Data(ref buf) => self.w.write_all(buf)?, - BufferedOutput::Style(ref style) => self.w.set_style(style)?, - } - } - Ok(()) - } -} - -impl io::Write for RightAlignWriter { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.to_fill = self.to_fill.saturating_sub(char_starts(buf)); - - let mut pushed = false; - if let Some(&mut BufferedOutput::Data(ref mut data)) = self.buf.last_mut() { - data.extend_from_slice(buf); - pushed = true; - }; - - if !pushed { - self.buf.push(BufferedOutput::Data(buf.to_owned())); - } - Ok(buf.len()) - } - - fn flush(&mut self) -> io::Result<()> { - Ok(()) - } -} - -impl encode::Write for RightAlignWriter { - fn set_style(&mut self, style: &Style) -> io::Result<()> { - self.buf.push(BufferedOutput::Style(style.clone())); - Ok(()) - } -} - #[derive(Clone, Eq, PartialEq, Hash, Debug)] enum Chunk { Text(String), @@ -318,58 +177,12 @@ impl Chunk { Chunk::Formatted { ref chunk, ref params, - } => match (params.min_width, params.max_width, params.align) { - (None, None, _) => chunk.encode(w, record), - (None, Some(max_width), _) => { - let mut w = MaxWidthWriter { - remaining: max_width, - w, - }; - chunk.encode(&mut w, record) - } - (Some(min_width), None, Alignment::Left) => { - let mut w = LeftAlignWriter { - to_fill: min_width, - fill: params.fill, - w, - }; - chunk.encode(&mut w, record)?; - w.finish() - } - (Some(min_width), None, Alignment::Right) => { - let mut w = RightAlignWriter { - to_fill: min_width, - fill: params.fill, - w, - buf: vec![], - }; - chunk.encode(&mut w, record)?; - w.finish() - } - (Some(min_width), Some(max_width), Alignment::Left) => { - let mut w = LeftAlignWriter { - to_fill: min_width, - fill: params.fill, - w: MaxWidthWriter { - remaining: max_width, - w, - }, - }; - chunk.encode(&mut w, record)?; - w.finish() - } - (Some(min_width), Some(max_width), Alignment::Right) => { - let mut w = RightAlignWriter { - to_fill: min_width, - fill: params.fill, - w: MaxWidthWriter { - remaining: max_width, - w, - }, - buf: vec![], - }; + } => match (params.min_width, params.max_width) { + (None, None) => chunk.encode(w, record), + _ => { + let mut w = StringBasedWriter::new(w, params); chunk.encode(&mut w, record)?; - w.finish() + w.chunk_end() } }, Chunk::Error(ref s) => write!(w, "{{ERROR: {}}}", s), @@ -560,6 +373,181 @@ impl<'a> From> for Chunk { } } +enum StringOrStyle { + String { glen: usize, s: String }, //glen means length in graphemes + Style(Style), +} + +struct StringBasedWriter<'writer, 'params> { + buf: Vec, + strings_and_styles: Vec, + w: &'writer mut dyn encode::Write, + params: &'params Parameters, +} + +impl encode::Write for StringBasedWriter<'_, '_> { + fn set_style(&mut self, style: &Style) -> io::Result<()> { + self.push_string(); + self.strings_and_styles + .push(StringOrStyle::Style(style.clone())); + Ok(()) + } +} + +impl io::Write for StringBasedWriter<'_, '_> { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.buf.extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +impl<'writer, 'params> StringBasedWriter<'writer, 'params> { + fn new(w: &'writer mut dyn encode::Write, params: &'params Parameters) -> Self { + StringBasedWriter { + buf: Vec::new(), + strings_and_styles: Vec::new(), + w, + params, + } + } + + fn push_string(&mut self) { + if !self.buf.is_empty() { + let old_buf = mem::take(&mut self.buf); + let s = String::from_utf8_lossy(&old_buf[..]).into_owned(); + let glen = s.graphemes(true).count(); + self.strings_and_styles + .push(StringOrStyle::String { glen, s }); + } + } + + fn chunk_end(&mut self) -> io::Result<()> { + self.push_string(); + let total_width = self.compute_width(); + let mut done = false; + if let Some(max_width) = self.params.max_width { + if total_width > max_width { + if self.params.right_truncate { + self.output_right_truncate(max_width)?; + } else { + self.output_left_truncate(total_width, max_width)?; + } + done = true; + } + } + if let Some(min_width) = self.params.min_width { + if total_width < min_width { + if self.params.align == Alignment::Left { + self.output_everything()?; + self.output_padding(min_width - total_width)?; + } else { + self.output_padding(min_width - total_width)?; + self.output_everything()?; + } + done = true; + } + } + if !done { + // between min and max length + self.output_everything()?; + } + Ok(()) + } + + fn compute_width(&self) -> usize { + let mut size = 0; + for x in &self.strings_and_styles { + if let StringOrStyle::String { glen, s: _ } = x { + size += glen; + } + } + size + } + + fn output_left_truncate(&mut self, total_width: usize, max_width: usize) -> io::Result<()> { + let mut to_cut = total_width - max_width; + for x in &self.strings_and_styles { + match x { + StringOrStyle::String { glen, s } => { + if to_cut == 0 { + self.w.write_all(s.as_bytes())?; + } else if *glen <= to_cut { + to_cut -= glen; + } else { + let start = Self::boundary_or(s, to_cut, 0); + self.w.write_all(&s.as_bytes()[start..])?; + to_cut = 0; + } + } + StringOrStyle::Style(s) => self.w.set_style(s)?, + } + } + Ok(()) + } + + fn boundary_or(s: &String, count: usize, or: usize) -> usize { + let mut cursor = GraphemeCursor::new(0, s.len(), true); + let s = s.as_str(); + let mut start = 0; + for _i in 0..count { + let r = cursor.next_boundary(s, 0); + if let Ok(Some(x)) = r { + start = x; + } else { + // this should never happen, as we sanitize with to_utf8_lossy + // but we don't assume so: we'll use the default, which will conservatively + // output everything instead of trying to cut + start = or; + break; + } + } + start + } + + fn output_right_truncate(&mut self, mut max_width: usize) -> io::Result<()> { + for x in &self.strings_and_styles { + match x { + StringOrStyle::String { glen, s } => { + if *glen <= max_width { + self.w.write_all(s.as_bytes())?; + max_width -= glen; + } else { + let end = Self::boundary_or(s, max_width, s.len()); + self.w.write_all(&s.as_bytes()[0..end])?; + max_width = 0; + } + if max_width == 0 { + break; + } + } + StringOrStyle::Style(s) => self.w.set_style(s)?, + } + } + Ok(()) + } + + fn output_everything(&mut self) -> io::Result<()> { + for x in &self.strings_and_styles { + match x { + StringOrStyle::String { glen: _, s } => self.w.write_all(s.as_bytes())?, + StringOrStyle::Style(s) => self.w.set_style(s)?, + } + } + Ok(()) + } + + fn output_padding(&mut self, len: usize) -> io::Result<()> { + for _i in 0..len { + write!(self.w, "{}", self.params.fill)?; + } + Ok(()) + } +} + fn no_args(arg: &[Vec], params: Parameters, chunk: FormattedChunk) -> Chunk { if arg.is_empty() { Chunk::Formatted { chunk, params } @@ -913,38 +901,82 @@ mod tests { assert_eq!(buf, b"foobar"); } - #[test] #[cfg(feature = "simple_writer")] - fn left_align_formatter() { - let pw = PatternEncoder::new("{({l} {m}):15}"); + fn assert_info_message(pattern: &str, msg: &str, expected: &[u8]) { + let pw = PatternEncoder::new(pattern); let mut buf = vec![]; pw.encode( &mut SimpleWriter(&mut buf), &Record::builder() .level(Level::Info) - .args(format_args!("foobar!")) + .args(format_args!("{}", msg)) .build(), ) .unwrap(); - assert_eq!(buf, b"INFO foobar! "); + assert_eq!(buf, expected); + } + + #[test] + #[cfg(feature = "simple_writer")] + fn left_align_formatter() { + assert_info_message("{({l} {m}):15}", "foobar!", b"INFO foobar! "); + assert_info_message("{({l} {m}):7}", "foobar!", b"INFO foobar!"); + } + + #[test] + #[cfg(feature = "simple_writer")] + fn right_truncate_formatter() { + assert_info_message("{({l} {m}):7.7}", "foobar!", b"INFO fo"); + assert_info_message("{({l} {m}):12.12}", "foobar!", b"INFO foobar!"); + assert_info_message("{({l} {m}):7.14}", "foobar!", b"INFO foobar!"); + } + + #[test] + #[cfg(feature = "simple_writer")] + fn left_truncate_formatter() { + assert_info_message("{({l} {m}):-9.9}", "foobar!", b"O foobar!"); + assert_info_message("{({l} {m}):-12.12}", "foobar!", b"INFO foobar!"); + assert_info_message("{({l} {m}):-7.14}", "foobar!", b"INFO foobar!"); } #[test] #[cfg(feature = "simple_writer")] fn right_align_formatter() { - let pw = PatternEncoder::new("{({l} {m}):>15}"); + assert_info_message("{({l} {m}):>15}", "foobar!", b" INFO foobar!"); + assert_info_message("{({l} {m}):>12}", "foobar!", b"INFO foobar!"); + assert_info_message("{({l} {m}):>7}", "foobar!", b"INFO foobar!"); + } - let mut buf = vec![]; - pw.encode( - &mut SimpleWriter(&mut buf), - &Record::builder() - .level(Level::Info) - .args(format_args!("foobar!")) - .build(), - ) - .unwrap(); - assert_eq!(buf, b" INFO foobar!"); + #[test] + #[cfg(feature = "simple_writer")] + fn right_align_formatter_hard_unicode() { + assert_info_message( + "{({l} {m}):>15}", + "\u{01f5}\u{0067}\u{0301}", + " INFO \u{01f5}\u{0067}\u{0301}".as_bytes(), + ); + } + + #[test] + #[cfg(feature = "simple_writer")] + fn zalgo_text() { + let zalgo = "m\u{0301}\u{0302}o\u{0303}\u{0304}\u{0305}\u{0306}re testing l\u{113}ss \u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4} CVE-2021-30860"; + assert_info_message( + "{({l} {m}):10.10}", + zalgo, + "INFO m\u{0301}\u{0302}o\u{0303}\u{0304}\u{0305}\u{0306}re ".as_bytes(), + ); + assert_info_message( + "{({l} {m}):24.24}", + zalgo, + "INFO m\u{0301}\u{0302}o\u{0303}\u{0304}\u{0305}\u{0306}re testing l\u{113}ss \u{1F1F7}\u{1F1F8}".as_bytes(), + ); + assert_info_message( + "{({l} {m}):-24.24}", + zalgo, + "g l\u{113}ss \u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4} CVE-2021-30860".as_bytes(), + ); } #[test] diff --git a/src/encode/pattern/parser.rs b/src/encode/pattern/parser.rs index 8e91e8ec..4e68172e 100644 --- a/src/encode/pattern/parser.rs +++ b/src/encode/pattern/parser.rs @@ -20,6 +20,7 @@ pub struct Formatter<'a> { #[derive(Clone, Eq, PartialEq, Hash, Debug)] pub struct Parameters { pub fill: char, + pub right_truncate: bool, pub align: Alignment, pub min_width: Option, pub max_width: Option, @@ -123,6 +124,7 @@ impl<'a> Parser<'a> { fn parameters(&mut self) -> Parameters { let mut params = Parameters { fill: ' ', + right_truncate: true, align: Alignment::Left, min_width: None, max_width: None, @@ -148,6 +150,10 @@ impl<'a> Parser<'a> { params.align = Alignment::Right; } + if self.consume('-') { + params.right_truncate = false; + } + if let Some(min_width) = self.integer() { params.min_width = Some(min_width); }