diff --git a/Cargo.toml b/Cargo.toml
index 629999aa..b84bb72c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -71,6 +71,7 @@ parking_lot = { version = "0.12.0", optional = true }
thiserror = "1.0.15"
anyhow = "1.0.28"
derivative = "2.2"
+unicode-segmentation = "1.10.0"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", optional = true, features = ["handleapi", "minwindef", "processenv", "winbase", "wincon"] }
diff --git a/src/encode/pattern/mod.rs b/src/encode/pattern/mod.rs
index 5215f2ec..bfede0bc 100644
--- a/src/encode/pattern/mod.rs
+++ b/src/encode/pattern/mod.rs
@@ -12,11 +12,12 @@
//! name := identifier
//! argument := format_string
//!
-//! format_spec := [ [ fill ] align ] [ min_width ] [ '.' max_width ]
+//! format_spec := [ [ fill ] align ] [left_truncate] [ min_width ] [ '.' max_width ]
//! fill := character
//! align := '<' | '>'
//! min_width := number
//! max_width := number
+//! left_truncate := '-'
//! ```
//!
//! # Special characters
@@ -101,6 +102,10 @@
//! configured. Any output over the maximum length will be truncated, and
//! output under the minimum length will be padded (see above).
//!
+//! Truncation will cut the right end of the contents, unless left truncation
+//! is specified (with a minus sign). Left/right truncation and left/right
+//! alignment are specified independently.
+//!
//! # Examples
//!
//! The default pattern is `{d} {l} {t} - {m}{n}` which produces output like
@@ -119,12 +124,17 @@
//! INFO hello
, while the message `hello, world!` and log
//! level `DEBUG` will be truncated to `DEBUG hello, wo`.
//!
+//! The pattern `{({l} {m}):-15.15}` will behave as above, except the truncation
+//! will be from the left. For example, at `DEBUG` level, and a message of
+//! `hello, world!`, the output will be: `G hello, world!`
+//!
//! [MDC]: https://crates.io/crates/log-mdc
use chrono::{Local, Utc};
use derivative::Derivative;
use log::{Level, Record};
-use std::{default::Default, io, process, thread};
+use std::{default::Default, io, mem, process, thread};
+use unicode_segmentation::{GraphemeCursor, UnicodeSegmentation};
use crate::encode::{
self,
@@ -150,157 +160,6 @@ pub struct PatternEncoderConfig {
pattern: Option,
}
-fn is_char_boundary(b: u8) -> bool {
- b as i8 >= -0x40
-}
-
-fn char_starts(buf: &[u8]) -> usize {
- buf.iter().filter(|&&b| is_char_boundary(b)).count()
-}
-
-struct MaxWidthWriter<'a> {
- remaining: usize,
- w: &'a mut dyn encode::Write,
-}
-
-impl<'a> io::Write for MaxWidthWriter<'a> {
- fn write(&mut self, buf: &[u8]) -> io::Result {
- let mut remaining = self.remaining;
- let mut end = buf.len();
- for (idx, _) in buf
- .iter()
- .enumerate()
- .filter(|&(_, &b)| is_char_boundary(b))
- {
- if remaining == 0 {
- end = idx;
- break;
- }
- remaining -= 1;
- }
-
- // we don't want to report EOF, so just act as a sink past this point
- if end == 0 {
- return Ok(buf.len());
- }
-
- let buf = &buf[..end];
- match self.w.write(buf) {
- Ok(len) => {
- if len == end {
- self.remaining = remaining;
- } else {
- self.remaining -= char_starts(&buf[..len]);
- }
- Ok(len)
- }
- Err(e) => Err(e),
- }
- }
-
- fn flush(&mut self) -> io::Result<()> {
- self.w.flush()
- }
-}
-
-impl<'a> encode::Write for MaxWidthWriter<'a> {
- fn set_style(&mut self, style: &Style) -> io::Result<()> {
- self.w.set_style(style)
- }
-}
-
-struct LeftAlignWriter {
- to_fill: usize,
- fill: char,
- w: W,
-}
-
-impl LeftAlignWriter {
- fn finish(mut self) -> io::Result<()> {
- for _ in 0..self.to_fill {
- write!(self.w, "{}", self.fill)?;
- }
- Ok(())
- }
-}
-
-impl io::Write for LeftAlignWriter {
- fn write(&mut self, buf: &[u8]) -> io::Result {
- match self.w.write(buf) {
- Ok(len) => {
- self.to_fill = self.to_fill.saturating_sub(char_starts(&buf[..len]));
- Ok(len)
- }
- Err(e) => Err(e),
- }
- }
-
- fn flush(&mut self) -> io::Result<()> {
- self.w.flush()
- }
-}
-
-impl encode::Write for LeftAlignWriter {
- fn set_style(&mut self, style: &Style) -> io::Result<()> {
- self.w.set_style(style)
- }
-}
-
-enum BufferedOutput {
- Data(Vec),
- Style(Style),
-}
-
-struct RightAlignWriter {
- to_fill: usize,
- fill: char,
- w: W,
- buf: Vec,
-}
-
-impl RightAlignWriter {
- fn finish(mut self) -> io::Result<()> {
- for _ in 0..self.to_fill {
- write!(self.w, "{}", self.fill)?;
- }
- for out in self.buf {
- match out {
- BufferedOutput::Data(ref buf) => self.w.write_all(buf)?,
- BufferedOutput::Style(ref style) => self.w.set_style(style)?,
- }
- }
- Ok(())
- }
-}
-
-impl io::Write for RightAlignWriter {
- fn write(&mut self, buf: &[u8]) -> io::Result {
- self.to_fill = self.to_fill.saturating_sub(char_starts(buf));
-
- let mut pushed = false;
- if let Some(&mut BufferedOutput::Data(ref mut data)) = self.buf.last_mut() {
- data.extend_from_slice(buf);
- pushed = true;
- };
-
- if !pushed {
- self.buf.push(BufferedOutput::Data(buf.to_owned()));
- }
- Ok(buf.len())
- }
-
- fn flush(&mut self) -> io::Result<()> {
- Ok(())
- }
-}
-
-impl encode::Write for RightAlignWriter {
- fn set_style(&mut self, style: &Style) -> io::Result<()> {
- self.buf.push(BufferedOutput::Style(style.clone()));
- Ok(())
- }
-}
-
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
enum Chunk {
Text(String),
@@ -318,58 +177,12 @@ impl Chunk {
Chunk::Formatted {
ref chunk,
ref params,
- } => match (params.min_width, params.max_width, params.align) {
- (None, None, _) => chunk.encode(w, record),
- (None, Some(max_width), _) => {
- let mut w = MaxWidthWriter {
- remaining: max_width,
- w,
- };
- chunk.encode(&mut w, record)
- }
- (Some(min_width), None, Alignment::Left) => {
- let mut w = LeftAlignWriter {
- to_fill: min_width,
- fill: params.fill,
- w,
- };
- chunk.encode(&mut w, record)?;
- w.finish()
- }
- (Some(min_width), None, Alignment::Right) => {
- let mut w = RightAlignWriter {
- to_fill: min_width,
- fill: params.fill,
- w,
- buf: vec![],
- };
- chunk.encode(&mut w, record)?;
- w.finish()
- }
- (Some(min_width), Some(max_width), Alignment::Left) => {
- let mut w = LeftAlignWriter {
- to_fill: min_width,
- fill: params.fill,
- w: MaxWidthWriter {
- remaining: max_width,
- w,
- },
- };
- chunk.encode(&mut w, record)?;
- w.finish()
- }
- (Some(min_width), Some(max_width), Alignment::Right) => {
- let mut w = RightAlignWriter {
- to_fill: min_width,
- fill: params.fill,
- w: MaxWidthWriter {
- remaining: max_width,
- w,
- },
- buf: vec![],
- };
+ } => match (params.min_width, params.max_width) {
+ (None, None) => chunk.encode(w, record),
+ _ => {
+ let mut w = StringBasedWriter::new(w, params);
chunk.encode(&mut w, record)?;
- w.finish()
+ w.chunk_end()
}
},
Chunk::Error(ref s) => write!(w, "{{ERROR: {}}}", s),
@@ -560,6 +373,181 @@ impl<'a> From> for Chunk {
}
}
+enum StringOrStyle {
+ String { glen: usize, s: String }, //glen means length in graphemes
+ Style(Style),
+}
+
+struct StringBasedWriter<'writer, 'params> {
+ buf: Vec,
+ strings_and_styles: Vec,
+ w: &'writer mut dyn encode::Write,
+ params: &'params Parameters,
+}
+
+impl encode::Write for StringBasedWriter<'_, '_> {
+ fn set_style(&mut self, style: &Style) -> io::Result<()> {
+ self.push_string();
+ self.strings_and_styles
+ .push(StringOrStyle::Style(style.clone()));
+ Ok(())
+ }
+}
+
+impl io::Write for StringBasedWriter<'_, '_> {
+ fn write(&mut self, buf: &[u8]) -> io::Result {
+ self.buf.extend_from_slice(buf);
+ Ok(buf.len())
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ Ok(())
+ }
+}
+
+impl<'writer, 'params> StringBasedWriter<'writer, 'params> {
+ fn new(w: &'writer mut dyn encode::Write, params: &'params Parameters) -> Self {
+ StringBasedWriter {
+ buf: Vec::new(),
+ strings_and_styles: Vec::new(),
+ w,
+ params,
+ }
+ }
+
+ fn push_string(&mut self) {
+ if !self.buf.is_empty() {
+ let old_buf = mem::take(&mut self.buf);
+ let s = String::from_utf8_lossy(&old_buf[..]).into_owned();
+ let glen = s.graphemes(true).count();
+ self.strings_and_styles
+ .push(StringOrStyle::String { glen, s });
+ }
+ }
+
+ fn chunk_end(&mut self) -> io::Result<()> {
+ self.push_string();
+ let total_width = self.compute_width();
+ let mut done = false;
+ if let Some(max_width) = self.params.max_width {
+ if total_width > max_width {
+ if self.params.right_truncate {
+ self.output_right_truncate(max_width)?;
+ } else {
+ self.output_left_truncate(total_width, max_width)?;
+ }
+ done = true;
+ }
+ }
+ if let Some(min_width) = self.params.min_width {
+ if total_width < min_width {
+ if self.params.align == Alignment::Left {
+ self.output_everything()?;
+ self.output_padding(min_width - total_width)?;
+ } else {
+ self.output_padding(min_width - total_width)?;
+ self.output_everything()?;
+ }
+ done = true;
+ }
+ }
+ if !done {
+ // between min and max length
+ self.output_everything()?;
+ }
+ Ok(())
+ }
+
+ fn compute_width(&self) -> usize {
+ let mut size = 0;
+ for x in &self.strings_and_styles {
+ if let StringOrStyle::String { glen, s: _ } = x {
+ size += glen;
+ }
+ }
+ size
+ }
+
+ fn output_left_truncate(&mut self, total_width: usize, max_width: usize) -> io::Result<()> {
+ let mut to_cut = total_width - max_width;
+ for x in &self.strings_and_styles {
+ match x {
+ StringOrStyle::String { glen, s } => {
+ if to_cut == 0 {
+ self.w.write_all(s.as_bytes())?;
+ } else if *glen <= to_cut {
+ to_cut -= glen;
+ } else {
+ let start = Self::boundary_or(s, to_cut, 0);
+ self.w.write_all(&s.as_bytes()[start..])?;
+ to_cut = 0;
+ }
+ }
+ StringOrStyle::Style(s) => self.w.set_style(s)?,
+ }
+ }
+ Ok(())
+ }
+
+ fn boundary_or(s: &String, count: usize, or: usize) -> usize {
+ let mut cursor = GraphemeCursor::new(0, s.len(), true);
+ let s = s.as_str();
+ let mut start = 0;
+ for _i in 0..count {
+ let r = cursor.next_boundary(s, 0);
+ if let Ok(Some(x)) = r {
+ start = x;
+ } else {
+ // this should never happen, as we sanitize with to_utf8_lossy
+ // but we don't assume so: we'll use the default, which will conservatively
+ // output everything instead of trying to cut
+ start = or;
+ break;
+ }
+ }
+ start
+ }
+
+ fn output_right_truncate(&mut self, mut max_width: usize) -> io::Result<()> {
+ for x in &self.strings_and_styles {
+ match x {
+ StringOrStyle::String { glen, s } => {
+ if *glen <= max_width {
+ self.w.write_all(s.as_bytes())?;
+ max_width -= glen;
+ } else {
+ let end = Self::boundary_or(s, max_width, s.len());
+ self.w.write_all(&s.as_bytes()[0..end])?;
+ max_width = 0;
+ }
+ if max_width == 0 {
+ break;
+ }
+ }
+ StringOrStyle::Style(s) => self.w.set_style(s)?,
+ }
+ }
+ Ok(())
+ }
+
+ fn output_everything(&mut self) -> io::Result<()> {
+ for x in &self.strings_and_styles {
+ match x {
+ StringOrStyle::String { glen: _, s } => self.w.write_all(s.as_bytes())?,
+ StringOrStyle::Style(s) => self.w.set_style(s)?,
+ }
+ }
+ Ok(())
+ }
+
+ fn output_padding(&mut self, len: usize) -> io::Result<()> {
+ for _i in 0..len {
+ write!(self.w, "{}", self.params.fill)?;
+ }
+ Ok(())
+ }
+}
+
fn no_args(arg: &[Vec], params: Parameters, chunk: FormattedChunk) -> Chunk {
if arg.is_empty() {
Chunk::Formatted { chunk, params }
@@ -913,38 +901,82 @@ mod tests {
assert_eq!(buf, b"foobar");
}
- #[test]
#[cfg(feature = "simple_writer")]
- fn left_align_formatter() {
- let pw = PatternEncoder::new("{({l} {m}):15}");
+ fn assert_info_message(pattern: &str, msg: &str, expected: &[u8]) {
+ let pw = PatternEncoder::new(pattern);
let mut buf = vec![];
pw.encode(
&mut SimpleWriter(&mut buf),
&Record::builder()
.level(Level::Info)
- .args(format_args!("foobar!"))
+ .args(format_args!("{}", msg))
.build(),
)
.unwrap();
- assert_eq!(buf, b"INFO foobar! ");
+ assert_eq!(buf, expected);
+ }
+
+ #[test]
+ #[cfg(feature = "simple_writer")]
+ fn left_align_formatter() {
+ assert_info_message("{({l} {m}):15}", "foobar!", b"INFO foobar! ");
+ assert_info_message("{({l} {m}):7}", "foobar!", b"INFO foobar!");
+ }
+
+ #[test]
+ #[cfg(feature = "simple_writer")]
+ fn right_truncate_formatter() {
+ assert_info_message("{({l} {m}):7.7}", "foobar!", b"INFO fo");
+ assert_info_message("{({l} {m}):12.12}", "foobar!", b"INFO foobar!");
+ assert_info_message("{({l} {m}):7.14}", "foobar!", b"INFO foobar!");
+ }
+
+ #[test]
+ #[cfg(feature = "simple_writer")]
+ fn left_truncate_formatter() {
+ assert_info_message("{({l} {m}):-9.9}", "foobar!", b"O foobar!");
+ assert_info_message("{({l} {m}):-12.12}", "foobar!", b"INFO foobar!");
+ assert_info_message("{({l} {m}):-7.14}", "foobar!", b"INFO foobar!");
}
#[test]
#[cfg(feature = "simple_writer")]
fn right_align_formatter() {
- let pw = PatternEncoder::new("{({l} {m}):>15}");
+ assert_info_message("{({l} {m}):>15}", "foobar!", b" INFO foobar!");
+ assert_info_message("{({l} {m}):>12}", "foobar!", b"INFO foobar!");
+ assert_info_message("{({l} {m}):>7}", "foobar!", b"INFO foobar!");
+ }
- let mut buf = vec![];
- pw.encode(
- &mut SimpleWriter(&mut buf),
- &Record::builder()
- .level(Level::Info)
- .args(format_args!("foobar!"))
- .build(),
- )
- .unwrap();
- assert_eq!(buf, b" INFO foobar!");
+ #[test]
+ #[cfg(feature = "simple_writer")]
+ fn right_align_formatter_hard_unicode() {
+ assert_info_message(
+ "{({l} {m}):>15}",
+ "\u{01f5}\u{0067}\u{0301}",
+ " INFO \u{01f5}\u{0067}\u{0301}".as_bytes(),
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "simple_writer")]
+ fn zalgo_text() {
+ let zalgo = "m\u{0301}\u{0302}o\u{0303}\u{0304}\u{0305}\u{0306}re testing l\u{113}ss \u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4} CVE-2021-30860";
+ assert_info_message(
+ "{({l} {m}):10.10}",
+ zalgo,
+ "INFO m\u{0301}\u{0302}o\u{0303}\u{0304}\u{0305}\u{0306}re ".as_bytes(),
+ );
+ assert_info_message(
+ "{({l} {m}):24.24}",
+ zalgo,
+ "INFO m\u{0301}\u{0302}o\u{0303}\u{0304}\u{0305}\u{0306}re testing l\u{113}ss \u{1F1F7}\u{1F1F8}".as_bytes(),
+ );
+ assert_info_message(
+ "{({l} {m}):-24.24}",
+ zalgo,
+ "g l\u{113}ss \u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4} CVE-2021-30860".as_bytes(),
+ );
}
#[test]
diff --git a/src/encode/pattern/parser.rs b/src/encode/pattern/parser.rs
index 8e91e8ec..4e68172e 100644
--- a/src/encode/pattern/parser.rs
+++ b/src/encode/pattern/parser.rs
@@ -20,6 +20,7 @@ pub struct Formatter<'a> {
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub struct Parameters {
pub fill: char,
+ pub right_truncate: bool,
pub align: Alignment,
pub min_width: Option,
pub max_width: Option,
@@ -123,6 +124,7 @@ impl<'a> Parser<'a> {
fn parameters(&mut self) -> Parameters {
let mut params = Parameters {
fill: ' ',
+ right_truncate: true,
align: Alignment::Left,
min_width: None,
max_width: None,
@@ -148,6 +150,10 @@ impl<'a> Parser<'a> {
params.align = Alignment::Right;
}
+ if self.consume('-') {
+ params.right_truncate = false;
+ }
+
if let Some(min_width) = self.integer() {
params.min_width = Some(min_width);
}