From 9eaafa5f91b737bb5958d46f9647aa11104c6f25 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Thu, 19 Sep 2024 12:06:03 -0700 Subject: [PATCH] Treat newlines as width 0 in the 0.1 stream, publish 0.1.14 (#67) * Treat newlines as width 0 * Publish 0.1.14 --- Cargo.toml | 2 +- scripts/unicode.py | 5 ++++- src/tables.rs | 10 ++++++++-- tests/tests.rs | 24 +++++++++++++++++++----- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ae464ee..c956c80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" authors = [ "kwantam ", "Manish Goregaokar ", diff --git a/scripts/unicode.py b/scripts/unicode.py index aa0d86b..320da14 100755 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -1281,7 +1281,10 @@ def lookup_fns( s += """ if c <= '\\u{A0}' { match c { - '\\n' => (1, WidthInfo::LINE_FEED), + // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering + // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates. + // https://github.com/unicode-rs/unicode-width/issues/60 + '\\n' => (0, WidthInfo::LINE_FEED), '\\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT), _ => (1, WidthInfo::DEFAULT), } diff --git a/src/tables.rs b/src/tables.rs index fa632d6..49ef606 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -215,7 +215,10 @@ fn width_in_str(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) { } if c <= '\u{A0}' { match c { - '\n' => (1, WidthInfo::LINE_FEED), + // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering + // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates. + // https://github.com/unicode-rs/unicode-width/issues/60 + '\n' => (0, WidthInfo::LINE_FEED), '\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT), _ => (1, WidthInfo::DEFAULT), } @@ -507,7 +510,10 @@ fn width_in_str_cjk(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) { } if c <= '\u{A0}' { match c { - '\n' => (1, WidthInfo::LINE_FEED), + // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering + // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates. + // https://github.com/unicode-rs/unicode-width/issues/60 + '\n' => (0, WidthInfo::LINE_FEED), '\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT), _ => (1, WidthInfo::DEFAULT), } diff --git a/tests/tests.rs b/tests/tests.rs index 2940df2..87c76e3 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -214,18 +214,23 @@ fn test_control_line_break() { assert_width!('\r', None, None); assert_width!('\n', None, None); assert_width!("\r", 1, 1); - assert_width!("\n", 1, 1); - assert_width!("\r\n", 1, 1); + // This is 0 due to #60 + assert_width!("\n", 0, 0); + assert_width!("\r\n", 0, 0); assert_width!("\0", 1, 1); - assert_width!("1\t2\r\n3\u{85}4", 7, 7); - assert_width!("\r\u{FE0F}\n", 2, 2); - assert_width!("\r\u{200D}\n", 2, 2); + assert_width!("1\t2\r\n3\u{85}4", 6, 6); + assert_width!("\r\u{FE0F}\n", 1, 1); + assert_width!("\r\u{200D}\n", 1, 1); } #[test] fn char_str_consistent() { let mut s = String::with_capacity(4); for c in '\0'..=char::MAX { + // Newlines are special cased (#60) + if c == '\n' { + continue; + } s.clear(); s.push(c); assert_eq!(c.width().unwrap_or(1), s.width()); @@ -418,6 +423,10 @@ fn test_khmer_coeng() { assert_width!(format!("\u{17D2}{c}"), 0, 0); assert_width!(format!("\u{17D2}\u{200D}\u{200D}{c}"), 0, 0); } else { + // Newlines are special cased (#60) + if c == '\n' { + continue; + } assert_width!( format!("\u{17D2}{c}"), c.width().unwrap_or(1), @@ -588,6 +597,11 @@ fn emoji_test_file() { } } +#[test] +fn test_newline_zero_issue_60() { + assert_width!("a\na", 2, 2); +} + // Test traits are unsealed #[cfg(feature = "cjk")]