From ca04e66cf18b4a8695e01d1437bc615121407eb2 Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Wed, 28 Feb 2024 21:34:56 -0800 Subject: [PATCH 1/4] Fast lane now stores field starts to simplify the output function --- src/fast_lane.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/fast_lane.rs b/src/fast_lane.rs index 709a71c..cd52b9b 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -45,10 +45,13 @@ fn cut_str_fast_lane( fields.clear(); + // fields is going to hold at what index each field starts + fields.push(0); + for i in memchr::memchr_iter(opt.delimiter, buffer) { curr_field += 1; - fields.push(i); + fields.push(i + 1); if Side::Some(curr_field) == last_interesting_field { // We have no use for any other fields in this line @@ -64,13 +67,16 @@ fn cut_str_fast_lane( if Side::Some(curr_field) != last_interesting_field { // We reached the end of the line. Who knows, maybe // the user is interested in this field too. - fields.push(buffer.len()); + + // We add a fake start (+1 because we account for the fake delimiter) + // This allows us to avoid one if/else in the output loop + fields.push(buffer.len() + 1); } - let num_fields = fields.len(); + let num_fields = fields.len() - 1; match num_fields { - 1 if bounds.len() == 1 && fields[0] == buffer.len() => { + 1 if bounds.len() == 1 && fields[1] == buffer.len() + 1 => { stdout.write_all(buffer)?; } _ => { @@ -103,17 +109,12 @@ fn output_parts( stdout: &mut W, opt: &FastOpt, ) -> Result<()> { - let r = b.try_into_range(fields.len()); + let r = b.try_into_range(fields.len() - 1); let output = if r.is_ok() { let r = r.unwrap(); - - let idx_start = if r.start == 0 { - 0 - } else { - fields[r.start - 1] + 1 - }; - let idx_end = fields[r.end - 1]; + let idx_start = fields[r.start]; + let idx_end = fields[r.end] - 1; &line[idx_start..idx_end] } else if b.fallback_oob.is_some() { b.fallback_oob.as_ref().unwrap() From 6396b17a00253f621214c83c2c1559a21c75b840 Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Wed, 28 Feb 2024 21:39:39 -0800 Subject: [PATCH 2/4] Add tests about ranges --- src/cut_str.rs | 13 +++++++++++++ src/fast_lane.rs | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/cut_str.rs b/src/cut_str.rs index 2b7b5ba..16b7157 100644 --- a/src/cut_str.rs +++ b/src/cut_str.rs @@ -677,6 +677,19 @@ mod tests { assert_eq!(output, b"a\n".as_slice()); } + #[test] + fn cut_str_it_cut_ranges() { + let mut opt = make_fields_opt(); + let (mut output, mut buffer1, mut buffer2) = make_cut_str_buffers(); + let eol = &[EOL::Newline as u8]; + + let line = b"a-b-c"; + opt.bounds = UserBoundsList::from_str("1,1:3").unwrap(); + + cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap(); + assert_eq!(output, b"aa-b-c\n".as_slice()); + } + #[cfg(feature = "regex")] #[test] fn cut_str_regex_it_cut_a_field() { diff --git a/src/fast_lane.rs b/src/fast_lane.rs index cd52b9b..5468acb 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -343,6 +343,24 @@ mod tests { assert_eq!(output, b"a\n".as_slice()); } + #[test] + fn cut_str_it_cut_ranges() { + let opt = make_fields_opt("1,1:3"); + let (mut output, mut fields) = make_cut_str_buffers(); + + let line = b"a-b-c"; + + cut_str_fast_lane( + line, + &opt, + &mut output, + &mut fields, + opt.bounds.last_interesting_field, + ) + .unwrap(); + assert_eq!(output, b"aa-b-c\n".as_slice()); + } + #[test] fn cut_str_it_cut_with_negative_indices() { // just one negative index From b4dde0ce1a2c95842dd8c2940208ef94caac7670 Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Wed, 28 Feb 2024 21:43:23 -0800 Subject: [PATCH 3/4] Support \t while formatting fields --- CHANGELOG.md | 2 ++ src/bounds.rs | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9497d8e..39b37cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - feat: add the ability to display fallback output when a field is out of bound (you can set it per-field using `-f =somefallback` or by providing a generic fallback using `--fallback-oob somefallback`) +- feat: it is now possible to type \t while formatting fields and + output a TAB (as we similary do for \n) e.g. `-f '{1}\t{2}'` - feat: --characters now depends on the (default) regex feature - feat: help and short help are colored, as long as output is a tty and unless env var TERM=dumb or NO_COLOR (any value) is set diff --git a/src/bounds.rs b/src/bounds.rs index 67040fc..2a164fe 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -56,7 +56,8 @@ pub fn parse_bounds_list(s: &str) -> Result> { s[part_start..idx] .replace("{{", "{") .replace("}}", "}") - .replace("\\n", "\n"), + .replace("\\n", "\n") + .replace("\\t", "\t"), )); } @@ -81,7 +82,8 @@ pub fn parse_bounds_list(s: &str) -> Result> { s[part_start..] .replace("{{", "{") .replace("}}", "}") - .replace("\\n", "\n"), + .replace("\\n", "\n") + .replace("\\t", "\t"), )); } @@ -890,6 +892,16 @@ mod tests { BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))) ], ); + + assert_eq!( + parse_bounds_list("\\n\\t{{}}{1,2}\\n\\t{{}}").unwrap(), + vec![ + BoundOrFiller::Filler(String::from("\n\t{}")), + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), + BoundOrFiller::Filler(String::from("\n\t{}")), + ], + ); } #[test] From b43fdb925154811e944fb99e6087e609c0c07d6c Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Thu, 29 Feb 2024 22:16:33 -0800 Subject: [PATCH 4/4] Refactor Filler to use bytes --- src/bounds.rs | 22 ++++++++++++---------- src/cut_bytes.rs | 2 +- src/cut_lines.rs | 2 +- src/fast_lane.rs | 4 ++-- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/bounds.rs b/src/bounds.rs index 2a164fe..703548f 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -16,7 +16,7 @@ pub enum BoundsType { #[derive(Clone, Debug, PartialEq)] pub enum BoundOrFiller { Bound(UserBounds), - Filler(String), + Filler(Vec), } /** @@ -57,7 +57,8 @@ pub fn parse_bounds_list(s: &str) -> Result> { .replace("{{", "{") .replace("}}", "}") .replace("\\n", "\n") - .replace("\\t", "\t"), + .replace("\\t", "\t") + .into_bytes(), )); } @@ -83,7 +84,8 @@ pub fn parse_bounds_list(s: &str) -> Result> { .replace("{{", "{") .replace("}}", "}") .replace("\\n", "\n") - .replace("\\t", "\t"), + .replace("\\t", "\t") + .into_bytes(), )); } @@ -877,10 +879,10 @@ mod tests { assert_eq!( parse_bounds_list("hello {1,2} {{world}}").unwrap(), vec![ - BoundOrFiller::Filler(String::from("hello ")), + BoundOrFiller::Filler("hello ".into()), BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), - BoundOrFiller::Filler(String::from(" {world}")), + BoundOrFiller::Filler(" {world}".into()), ], ); @@ -888,7 +890,7 @@ mod tests { parse_bounds_list("{1}😎{2}").unwrap(), vec![ BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), - BoundOrFiller::Filler(String::from("😎")), + BoundOrFiller::Filler("😎".into()), BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))) ], ); @@ -896,10 +898,10 @@ mod tests { assert_eq!( parse_bounds_list("\\n\\t{{}}{1,2}\\n\\t{{}}").unwrap(), vec![ - BoundOrFiller::Filler(String::from("\n\t{}")), + BoundOrFiller::Filler("\n\t{}".into()), BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), - BoundOrFiller::Filler(String::from("\n\t{}")), + BoundOrFiller::Filler("\n\t{}".into()), ], ); } @@ -1051,10 +1053,10 @@ mod tests { assert_eq!( UserBoundsList::from_str("a{1:2}b").unwrap().unpack(4).list, vec![ - BoundOrFiller::Filler(String::from("a")), + BoundOrFiller::Filler("a".into()), BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), - BoundOrFiller::Filler(String::from("b")), + BoundOrFiller::Filler("b".into()), ] ); } diff --git a/src/cut_bytes.rs b/src/cut_bytes.rs index c856128..c144a83 100644 --- a/src/cut_bytes.rs +++ b/src/cut_bytes.rs @@ -16,7 +16,7 @@ fn cut_bytes(data: &[u8], opt: &Opt, stdout: &mut W) -> Result<()> { let r = b.try_into_range(data.len())?; &data[r.start..r.end] } - BoundOrFiller::Filler(f) => f.as_bytes(), + BoundOrFiller::Filler(f) => f, }; stdout.write_all(output)?; diff --git a/src/cut_lines.rs b/src/cut_lines.rs index 86d2e76..e797dc4 100644 --- a/src/cut_lines.rs +++ b/src/cut_lines.rs @@ -31,7 +31,7 @@ fn cut_lines_forward_only( let b = match bof { BoundOrFiller::Filler(f) => { - stdout.write_all(f.as_bytes())?; + stdout.write_all(f)?; bounds_idx += 1; if opt.join && bounds_idx != opt.bounds.len() { diff --git a/src/fast_lane.rs b/src/fast_lane.rs index 5468acb..8e3dc6a 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -83,7 +83,7 @@ fn cut_str_fast_lane( bounds.iter().try_for_each(|bof| -> Result<()> { match bof { BoundOrFiller::Filler(f) => { - stdout.write_all(f.as_bytes())?; + stdout.write_all(f)?; } BoundOrFiller::Bound(b) => { output_parts(buffer, b, fields, stdout, opt)?; @@ -166,7 +166,7 @@ impl<'a> TryFrom<&'a Opt> for FastOpt<'a> { ); } - let delimiter = value.delimiter.as_bytes().first().unwrap().to_owned(); + let delimiter: u8 = *value.delimiter.as_bytes().first().unwrap(); Ok(FastOpt { delimiter, join: value.join,