From dc242ce2aab305ec34b94eb2a0a0ea717626dfc1 Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Sun, 11 Feb 2024 23:47:02 -0800 Subject: [PATCH 1/8] Migrate get_last_bound into UserBoundsList --- src/bin/tuc.rs | 9 +--- src/bounds.rs | 86 ++++++++++++++++++++++++++++-------- src/cut_bytes.rs | 2 +- src/cut_lines.rs | 70 +++++++++++++----------------- src/fast_lane.rs | 111 ++++++++++------------------------------------- src/options.rs | 2 +- 6 files changed, 124 insertions(+), 156 deletions(-) diff --git a/src/bin/tuc.rs b/src/bin/tuc.rs index a38e58e..3e1f1ba 100644 --- a/src/bin/tuc.rs +++ b/src/bin/tuc.rs @@ -114,7 +114,7 @@ fn parse_args() -> Result { }; if bounds_type == BoundsType::Fields - && (maybe_fields.is_none() || maybe_fields.as_ref().unwrap().0.is_empty()) + && (maybe_fields.is_none() || maybe_fields.as_ref().unwrap().is_empty()) { eprintln!("tuc: invariant error. At this point we expected to find at least 1 field bound"); std::process::exit(1); @@ -209,12 +209,7 @@ fn parse_args() -> Result { .or(maybe_lines) .unwrap(); - if has_json - && bounds - .0 - .iter() - .any(|s| matches!(s, BoundOrFiller::Filler(_))) - { + if has_json && bounds.iter().any(|s| matches!(s, BoundOrFiller::Filler(_))) { eprintln!("tuc: runtime error. Cannot format fields when using --json"); std::process::exit(1); } diff --git a/src/bounds.rs b/src/bounds.rs index 6258889..3a20760 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -95,25 +95,62 @@ pub fn parse_bounds_list(s: &str) -> Result> { } } -#[derive(Debug)] -pub struct UserBoundsList(pub Vec); +#[derive(Debug, Clone)] +pub struct UserBoundsList { + pub list: Vec, + /// Optimization that we can use to stop searching for fields. + /// It's available only when every bound uses positive indexes. + /// When conditions do not apply, its value is `Side::Continue`. + last_interesting_field: Side, +} impl Deref for UserBoundsList { type Target = Vec; fn deref(&self) -> &Self::Target { - &self.0 + &self.list + } +} + +impl From> for UserBoundsList { + fn from(list: Vec) -> Self { + let mut ubl = UserBoundsList { + list, + last_interesting_field: Side::Continue, + }; + + let mut rightmost_bound: Option = None; + + // This is risky, we could end up using last_interesting_field + // internally. Couldn't figure out how to use is_sortable without + // major refactoring. + if ubl.is_sortable() { + ubl.list.iter().for_each(|bof| { + if let BoundOrFiller::Bound(b) = bof { + if rightmost_bound.is_none() || b.r > rightmost_bound.unwrap() { + rightmost_bound = Some(b.r); + } + } + }); + } + + ubl.last_interesting_field = rightmost_bound.unwrap_or(Side::Continue); + ubl } } impl FromStr for UserBoundsList { type Err = anyhow::Error; fn from_str(s: &str) -> Result { - Ok(UserBoundsList(parse_bounds_list(s)?)) + Ok(parse_bounds_list(s)?.into()) } } impl UserBoundsList { + pub fn new(list: Vec) -> Self { + list.into() + } + /// Detect whether the list can be sorted. /// It can be sorted only if every bound /// has the same sign (all positive or all negative). @@ -142,7 +179,7 @@ impl UserBoundsList { } fn get_userbounds_only(&self) -> impl Iterator + '_ { - self.0.iter().flat_map(|b| match b { + self.list.iter().flat_map(|b| match b { BoundOrFiller::Bound(x) => Some(x), _ => None, }) @@ -192,17 +229,22 @@ impl UserBoundsList { * and with every ranged bound converted into single slot bounds. */ pub fn unpack(&self, num_fields: usize) -> UserBoundsList { - UserBoundsList( - self.0 - .iter() - .filter_map(|x| match x { - BoundOrFiller::Filler(_) => None, - BoundOrFiller::Bound(b) => Some(b.unpack(num_fields)), - }) - .flatten() - .map(BoundOrFiller::Bound) - .collect(), - ) + let list: Vec = self + .list + .iter() + .filter_map(|x| match x { + BoundOrFiller::Filler(_) => None, + BoundOrFiller::Bound(b) => Some(b.unpack(num_fields)), + }) + .flatten() + .map(BoundOrFiller::Bound) + .collect(); + + list.into() + } + + pub fn get_last_bound(&self) -> Side { + self.last_interesting_field } } @@ -715,7 +757,7 @@ mod tests { #[test] fn test_user_bounds_is_sortable() { - assert!(UserBoundsList(Vec::new()).is_sortable()); + assert!(UserBoundsList::new(Vec::new()).is_sortable()); assert!(UserBoundsList::from_str("1").unwrap().is_sortable()); @@ -763,7 +805,10 @@ mod tests { #[test] fn test_vec_of_bounds_can_unpack() { assert_eq!( - UserBoundsList::from_str("1,:1,2:3,4:").unwrap().unpack(4).0, + UserBoundsList::from_str("1,:1,2:3,4:") + .unwrap() + .unpack(4) + .list, vec![ BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), @@ -774,7 +819,10 @@ mod tests { ); assert_eq!( - UserBoundsList::from_str("a{1}b{2}c").unwrap().unpack(4).0, + UserBoundsList::from_str("a{1}b{2}c") + .unwrap() + .unpack(4) + .list, vec![ BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), diff --git a/src/cut_bytes.rs b/src/cut_bytes.rs index a80f20d..a3d9d17 100644 --- a/src/cut_bytes.rs +++ b/src/cut_bytes.rs @@ -10,7 +10,7 @@ fn cut_bytes(data: &[u8], opt: &Opt, stdout: &mut W) -> Result<()> { return Ok(()); } - opt.bounds.0.iter().try_for_each(|bof| -> Result<()> { + opt.bounds.iter().try_for_each(|bof| -> Result<()> { let output = match bof { BoundOrFiller::Bound(b) => { let r = b.try_into_range(data.len())?; diff --git a/src/cut_lines.rs b/src/cut_lines.rs index 4a303c7..900c65b 100644 --- a/src/cut_lines.rs +++ b/src/cut_lines.rs @@ -147,40 +147,28 @@ mod tests { } } - const BOF_F1: BoundOrFiller = BoundOrFiller::Bound(UserBounds { - l: Side::Some(1), - r: Side::Some(1), - }); - - const BOF_F2: BoundOrFiller = BoundOrFiller::Bound(UserBounds { - l: Side::Some(2), - r: Side::Some(2), - }); - - const BOF_F3: BoundOrFiller = BoundOrFiller::Bound(UserBounds { - l: Side::Some(3), - r: Side::Some(3), - }); - - const BOF_R2_3: BoundOrFiller = BoundOrFiller::Bound(UserBounds { - l: Side::Some(2), - r: Side::Some(3), - }); - - const BOF_NEG1: BoundOrFiller = BoundOrFiller::Bound(UserBounds { - l: Side::Some(-1), - r: Side::Some(-1), - }); - - const BOF_F1_TO_END: BoundOrFiller = BoundOrFiller::Bound(UserBounds { - l: Side::Some(1), - r: Side::Continue, - }); + const BOF_F1: BoundOrFiller = + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))); + + const BOF_F2: BoundOrFiller = + BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))); + + const BOF_F3: BoundOrFiller = + BoundOrFiller::Bound(UserBounds::new(Side::Some(3), Side::Some(3))); + + const BOF_R2_3: BoundOrFiller = + BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(3))); + + const BOF_NEG1: BoundOrFiller = + BoundOrFiller::Bound(UserBounds::new(Side::Some(-1), Side::Some(-1))); + + const BOF_F1_TO_END: BoundOrFiller = + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Continue)); #[test] fn fwd_cut_one_field() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F1]); + opt.bounds = UserBoundsList::new(vec![BOF_F1]); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -191,7 +179,7 @@ mod tests { #[test] fn fwd_cut_multiple_fields() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F1, BOF_F2]); + opt.bounds = UserBoundsList::new(vec![BOF_F1, BOF_F2]); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -202,7 +190,7 @@ mod tests { #[test] fn fwd_support_ranges() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F1, BOF_R2_3]); + opt.bounds = UserBoundsList::new(vec![BOF_F1, BOF_R2_3]); let mut input = b"a\nb\nc".as_slice(); let mut output = Vec::with_capacity(100); @@ -213,7 +201,7 @@ mod tests { #[test] fn fwd_supports_no_join() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F1, BOF_F3]); + opt.bounds = UserBoundsList::new(vec![BOF_F1, BOF_F3]); opt.join = false; let mut input = b"a\nb\nc".as_slice(); @@ -225,7 +213,7 @@ mod tests { #[test] fn fwd_supports_no_right_bound() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F1_TO_END]); + opt.bounds = UserBoundsList::new(vec![BOF_F1_TO_END]); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -236,7 +224,7 @@ mod tests { #[test] fn fwd_handle_out_of_bounds() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F3]); + opt.bounds = UserBoundsList::new(vec![BOF_F3]); opt.join = true; let mut input = b"a\nb".as_slice(); @@ -248,7 +236,7 @@ mod tests { #[test] fn fwd_ignore_last_empty() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F3]); + opt.bounds = UserBoundsList::new(vec![BOF_F3]); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -266,7 +254,7 @@ mod tests { #[test] fn cut_lines_handle_negative_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_NEG1]); + opt.bounds = UserBoundsList::new(vec![BOF_NEG1]); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -277,7 +265,7 @@ mod tests { #[test] fn cut_lines_ignore_last_empty_when_using_positive_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F3]); + opt.bounds = UserBoundsList::new(vec![BOF_F3]); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -295,7 +283,7 @@ mod tests { #[test] fn cut_lines_ignore_last_empty_when_using_negative_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_NEG1]); + opt.bounds = UserBoundsList::new(vec![BOF_NEG1]); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -313,7 +301,7 @@ mod tests { #[test] fn fwd_cut_zero_delimited() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F1]); + opt.bounds = UserBoundsList::new(vec![BOF_F1]); opt.eol = EOL::Zero; opt.delimiter = String::from("\0"); @@ -326,7 +314,7 @@ mod tests { #[test] fn cut_lines_zero_delimited() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList(vec![BOF_F1]); + opt.bounds = UserBoundsList::new(vec![BOF_F1]); opt.eol = EOL::Zero; opt.delimiter = String::from("\0"); diff --git a/src/fast_lane.rs b/src/fast_lane.rs index db6b5c6..e7fbb8f 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -1,10 +1,9 @@ use crate::bounds::{BoundOrFiller, BoundsType, Side, UserBounds, UserBoundsList}; use crate::options::{Opt, Trim, EOL}; -use anyhow::{bail, Result}; +use anyhow::Result; use bstr::ByteSlice; use std::convert::TryFrom; use std::io::{self, BufRead}; -use std::ops::Deref; use std::str::FromStr; use std::{io::Write, ops::Range}; @@ -140,7 +139,7 @@ pub struct FastOpt { delimiter: u8, join: bool, eol: EOL, - bounds: ForwardBounds, + bounds: UserBoundsList, only_delimited: bool, trim: Option, } @@ -151,7 +150,7 @@ impl Default for FastOpt { delimiter: b'\t', join: false, eol: EOL::Newline, - bounds: ForwardBounds::try_from(&UserBoundsList::from_str("1:").unwrap()).unwrap(), + bounds: UserBoundsList::from_str("1:").unwrap(), only_delimited: false, trim: None, } @@ -179,77 +178,15 @@ impl TryFrom<&Opt> for FastOpt { ); } - if let Ok(forward_bounds) = ForwardBounds::try_from(&value.bounds) { - Ok(FastOpt { - delimiter: value.delimiter.as_bytes().first().unwrap().to_owned(), - join: value.join, - eol: value.eol, - bounds: forward_bounds, - only_delimited: value.only_delimited, - trim: value.trim, - }) - } else { - Err("Bounds cannot be converted to ForwardBounds") - } - } -} - -#[derive(Debug)] -struct ForwardBounds { - pub list: UserBoundsList, - // Optimization that we can use to stop searching for fields - // It's available only when every bound use positive indexes. - // When conditions do not apply, Side::Continue is used. - last_interesting_field: Side, -} - -impl TryFrom<&UserBoundsList> for ForwardBounds { - type Error = anyhow::Error; - - fn try_from(value: &UserBoundsList) -> Result { - if value.is_empty() { - bail!("Cannot create ForwardBounds from an empty UserBoundsList"); - } else { - let value: UserBoundsList = UserBoundsList(value.iter().cloned().collect()); - - let mut rightmost_bound: Option = None; - if value.is_sortable() { - value.iter().for_each(|bof| { - if let BoundOrFiller::Bound(b) = bof { - if rightmost_bound.is_none() || b.r > rightmost_bound.unwrap() { - rightmost_bound = Some(b.r); - } - } - }); - } - - Ok(ForwardBounds { - list: value, - last_interesting_field: rightmost_bound.unwrap_or(Side::Continue), - }) - } - } -} - -impl Deref for ForwardBounds { - type Target = UserBoundsList; - - fn deref(&self) -> &Self::Target { - &self.list - } -} - -impl ForwardBounds { - fn get_last_bound(&self) -> Side { - self.last_interesting_field - } -} - -impl FromStr for ForwardBounds { - type Err = anyhow::Error; - fn from_str(s: &str) -> Result { - let bounds_list = UserBoundsList::from_str(s)?; - ForwardBounds::try_from(&bounds_list) + let delimiter = value.delimiter.as_bytes().first().unwrap().to_owned(); + Ok(FastOpt { + delimiter, + join: value.join, + eol: value.eol, + bounds: value.bounds.clone(), + only_delimited: value.only_delimited, + trim: value.trim, + }) } } @@ -397,7 +334,7 @@ mod tests { let (mut output, mut fields) = make_cut_str_buffers(); let line = b"a-b-c"; - opt.bounds = ForwardBounds::from_str("1").unwrap(); + opt.bounds = UserBoundsList::from_str("1").unwrap(); cut_str_fast_lane( line, @@ -417,7 +354,7 @@ mod tests { let line = b"a-b-c"; // just one negative index - opt.bounds = ForwardBounds::from_str("-1").unwrap(); + opt.bounds = UserBoundsList::from_str("-1").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( line, @@ -430,7 +367,7 @@ mod tests { assert_eq!(output, b"c\n".as_slice()); // multiple negative indices, in forward order - opt.bounds = ForwardBounds::from_str("-2,-1").unwrap(); + opt.bounds = UserBoundsList::from_str("-2,-1").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( line, @@ -443,7 +380,7 @@ mod tests { assert_eq!(output, b"bc\n".as_slice()); // multiple negative indices, in non-forward order - opt.bounds = ForwardBounds::from_str("-1,-2").unwrap(); + opt.bounds = UserBoundsList::from_str("-1,-2").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( line, @@ -458,7 +395,7 @@ mod tests { // mix positive and negative indices // (this is particularly useful to verify that we don't screw // up optimizations on last field to check) - opt.bounds = ForwardBounds::from_str("-1,1").unwrap(); + opt.bounds = UserBoundsList::from_str("-1,1").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( line, @@ -477,7 +414,7 @@ mod tests { let (mut output, mut fields) = make_cut_str_buffers(); let line = b"a-b-c"; - opt.bounds = ForwardBounds::from_str("1,3").unwrap(); + opt.bounds = UserBoundsList::from_str("1,3").unwrap(); cut_str_fast_lane( line, @@ -497,7 +434,7 @@ mod tests { opt.eol = EOL::Zero; let line = b"a-b-c"; - opt.bounds = ForwardBounds::from_str("2").unwrap(); + opt.bounds = UserBoundsList::from_str("2").unwrap(); cut_str_fast_lane( line, @@ -516,7 +453,7 @@ mod tests { let (mut output, mut fields) = make_cut_str_buffers(); let line = b"a-b-c"; - opt.bounds = ForwardBounds::from_str("1,3").unwrap(); + opt.bounds = UserBoundsList::from_str("1,3").unwrap(); opt.join = true; cut_str_fast_lane( @@ -536,7 +473,7 @@ mod tests { let (mut output, mut fields) = make_cut_str_buffers(); let line = b"a-b-c"; - opt.bounds = ForwardBounds::from_str("{1} < {3} > {2}").unwrap(); + opt.bounds = UserBoundsList::from_str("{1} < {3} > {2}").unwrap(); cut_str_fast_lane( line, @@ -556,7 +493,7 @@ mod tests { // check Trim::Both opt.trim = Some(Trim::Both); - opt.bounds = ForwardBounds::from_str("1,3,-1").unwrap(); + opt.bounds = UserBoundsList::from_str("1,3,-1").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( @@ -571,7 +508,7 @@ mod tests { // check Trim::Left opt.trim = Some(Trim::Left); - opt.bounds = ForwardBounds::from_str("1,3,-3").unwrap(); + opt.bounds = UserBoundsList::from_str("1,3,-3").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( @@ -586,7 +523,7 @@ mod tests { // check Trim::Right opt.trim = Some(Trim::Right); - opt.bounds = ForwardBounds::from_str("3,5,-1").unwrap(); + opt.bounds = UserBoundsList::from_str("3,5,-1").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( diff --git a/src/options.rs b/src/options.rs index d4eefbf..603b10c 100644 --- a/src/options.rs +++ b/src/options.rs @@ -54,7 +54,7 @@ impl Default for Opt { Opt { delimiter: String::from("-"), eol: EOL::Newline, - bounds: UserBoundsList(vec![BoundOrFiller::Bound(UserBounds::default())]), + bounds: UserBoundsList::new(vec![BoundOrFiller::Bound(UserBounds::default())]), bounds_type: BoundsType::Fields, only_delimited: false, greedy_delimiter: false, From c0cbb63603af7e479be094a8c733f8cb491b9479 Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Sun, 11 Feb 2024 23:50:11 -0800 Subject: [PATCH 2/8] Expose directly last_interesting_field --- src/bounds.rs | 10 +++------- src/fast_lane.rs | 34 +++++++++++++++++----------------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/bounds.rs b/src/bounds.rs index 3a20760..5f3a715 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -101,7 +101,7 @@ pub struct UserBoundsList { /// Optimization that we can use to stop searching for fields. /// It's available only when every bound uses positive indexes. /// When conditions do not apply, its value is `Side::Continue`. - last_interesting_field: Side, + pub last_interesting_field: Side, } impl Deref for UserBoundsList { @@ -122,8 +122,8 @@ impl From> for UserBoundsList { let mut rightmost_bound: Option = None; // This is risky, we could end up using last_interesting_field - // internally. Couldn't figure out how to use is_sortable without - // major refactoring. + // internally. Didn't spend much time to figure out how to use + // is_sortable without major refactoring. if ubl.is_sortable() { ubl.list.iter().for_each(|bof| { if let BoundOrFiller::Bound(b) = bof { @@ -242,10 +242,6 @@ impl UserBoundsList { list.into() } - - pub fn get_last_bound(&self) -> Side { - self.last_interesting_field - } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/fast_lane.rs b/src/fast_lane.rs index e7fbb8f..a93527e 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -197,7 +197,7 @@ pub fn read_and_cut_text_as_bytes( ) -> Result<()> { let mut fields: Vec> = Vec::with_capacity(16); - let last_interesting_field = opt.bounds.get_last_bound(); + let last_interesting_field = opt.bounds.last_interesting_field; match opt.eol { EOL::Newline => stdin.for_byte_line(|line| { @@ -276,7 +276,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"foo\n".as_slice()); @@ -289,7 +289,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"\n".as_slice()); @@ -309,7 +309,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"".as_slice()); @@ -322,7 +322,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"".as_slice()); @@ -341,7 +341,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"a\n".as_slice()); @@ -361,7 +361,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"c\n".as_slice()); @@ -374,7 +374,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"bc\n".as_slice()); @@ -387,7 +387,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"cb\n".as_slice()); @@ -402,7 +402,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"ca\n".as_slice()); @@ -421,7 +421,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"ac\n".as_slice()); @@ -441,7 +441,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"b\0".as_slice()); @@ -461,7 +461,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"a-c\n".as_slice()); @@ -480,7 +480,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"a < c > b\n".as_slice()); @@ -501,7 +501,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"abc\n".as_slice()); @@ -516,7 +516,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"abc\n".as_slice()); @@ -531,7 +531,7 @@ mod tests { &opt, &mut output, &mut fields, - opt.bounds.get_last_bound(), + opt.bounds.last_interesting_field, ) .unwrap(); assert_eq!(output, b"abc\n".as_slice()); From f72f858f0f51edb56c85a7f8a213454e8b9e3775 Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Tue, 13 Feb 2024 00:26:29 -0800 Subject: [PATCH 3/8] Create UserBoundsTrait --- src/bounds.rs | 25 ++++++++++++-------- src/cut_bytes.rs | 2 +- src/cut_lines.rs | 61 ++++++++++++++++++++++++------------------------ src/cut_str.rs | 2 +- src/fast_lane.rs | 2 +- 5 files changed, 49 insertions(+), 43 deletions(-) diff --git a/src/bounds.rs b/src/bounds.rs index 5f3a715..2e8d946 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -352,8 +352,15 @@ impl FromStr for UserBounds { } } -impl UserBounds { - pub fn new(l: Side, r: Side) -> Self { +pub trait UserBoundsTrait { + fn new(l: Side, r: Side) -> Self; + fn try_into_range(&self, parts_length: usize) -> Result>; + fn matches(&self, idx: T) -> Result; + fn unpack(&self, num_fields: usize) -> Vec; +} + +impl UserBoundsTrait for UserBounds { + fn new(l: Side, r: Side) -> Self { UserBounds { l, r } } /** @@ -366,7 +373,7 @@ impl UserBounds { * Fields are 1-indexed. */ #[inline(always)] - pub fn matches(&self, idx: i32) -> Result { + fn matches(&self, idx: i32) -> Result { match (self.l, self.r) { (Side::Some(left), _) if (left * idx).is_negative() => { bail!( @@ -401,7 +408,7 @@ impl UserBounds { /// e.g. /// /// ```rust - /// # use tuc::bounds::UserBounds; + /// # use tuc::bounds::{UserBounds, UserBoundsTrait}; /// # use std::ops::Range; /// # use tuc::bounds::Side; /// @@ -415,7 +422,7 @@ impl UserBounds { /// Range { start: 0, end: 5} /// ); /// ``` - pub fn try_into_range(&self, parts_length: usize) -> Result> { + fn try_into_range(&self, parts_length: usize) -> Result> { let start: usize = match self.l { Side::Continue => 0, Side::Some(v) => { @@ -452,11 +459,9 @@ impl UserBounds { Ok(Range { start, end }) } - /** - * Transform a ranged bound into a list of one or more - * 1 slot bound - */ - pub fn unpack(&self, num_fields: usize) -> Vec { + /// Transform a ranged bound into a list of one or more + /// slot bound + fn unpack(&self, num_fields: usize) -> Vec { let mut bounds = Vec::new(); let n: i32 = num_fields .try_into() diff --git a/src/cut_bytes.rs b/src/cut_bytes.rs index a3d9d17..c856128 100644 --- a/src/cut_bytes.rs +++ b/src/cut_bytes.rs @@ -1,7 +1,7 @@ use anyhow::Result; use std::io::{Read, Write}; -use crate::bounds::BoundOrFiller; +use crate::bounds::{BoundOrFiller, UserBoundsTrait}; use crate::options::Opt; use crate::read_utils::read_bytes_to_end; diff --git a/src/cut_lines.rs b/src/cut_lines.rs index 900c65b..ffe9f0d 100644 --- a/src/cut_lines.rs +++ b/src/cut_lines.rs @@ -2,7 +2,7 @@ use anyhow::{bail, Result}; use std::io::{BufRead, Write}; use std::ops::Range; -use crate::bounds::{BoundOrFiller, Side}; +use crate::bounds::{BoundOrFiller, Side, UserBoundsTrait}; use crate::cut_str::cut_str; use crate::options::Opt; use crate::read_utils::read_line_with_eol; @@ -147,28 +147,29 @@ mod tests { } } - const BOF_F1: BoundOrFiller = - BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))); - - const BOF_F2: BoundOrFiller = - BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))); - - const BOF_F3: BoundOrFiller = - BoundOrFiller::Bound(UserBounds::new(Side::Some(3), Side::Some(3))); - - const BOF_R2_3: BoundOrFiller = - BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(3))); - - const BOF_NEG1: BoundOrFiller = - BoundOrFiller::Bound(UserBounds::new(Side::Some(-1), Side::Some(-1))); - - const BOF_F1_TO_END: BoundOrFiller = - BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Continue)); + fn bof_f1() -> BoundOrFiller { + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))) + } + fn bof_f2() -> BoundOrFiller { + BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))) + } + fn bof_f3() -> BoundOrFiller { + BoundOrFiller::Bound(UserBounds::new(Side::Some(3), Side::Some(3))) + } + fn bof_r2_3() -> BoundOrFiller { + BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(3))) + } + fn bof_neg1() -> BoundOrFiller { + BoundOrFiller::Bound(UserBounds::new(Side::Some(-1), Side::Some(-1))) + } + fn bof_f1_to_end() -> BoundOrFiller { + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Continue)) + } #[test] fn fwd_cut_one_field() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F1]); + opt.bounds = UserBoundsList::new(vec![bof_f1()]); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -179,7 +180,7 @@ mod tests { #[test] fn fwd_cut_multiple_fields() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F1, BOF_F2]); + opt.bounds = UserBoundsList::new(vec![bof_f1(), bof_f2()]); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -190,7 +191,7 @@ mod tests { #[test] fn fwd_support_ranges() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F1, BOF_R2_3]); + opt.bounds = UserBoundsList::new(vec![bof_f1(), bof_r2_3()]); let mut input = b"a\nb\nc".as_slice(); let mut output = Vec::with_capacity(100); @@ -201,7 +202,7 @@ mod tests { #[test] fn fwd_supports_no_join() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F1, BOF_F3]); + opt.bounds = UserBoundsList::new(vec![bof_f1(), bof_f3()]); opt.join = false; let mut input = b"a\nb\nc".as_slice(); @@ -213,7 +214,7 @@ mod tests { #[test] fn fwd_supports_no_right_bound() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F1_TO_END]); + opt.bounds = UserBoundsList::new(vec![bof_f1_to_end()]); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -224,7 +225,7 @@ mod tests { #[test] fn fwd_handle_out_of_bounds() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F3]); + opt.bounds = UserBoundsList::new(vec![bof_f3()]); opt.join = true; let mut input = b"a\nb".as_slice(); @@ -236,7 +237,7 @@ mod tests { #[test] fn fwd_ignore_last_empty() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F3]); + opt.bounds = UserBoundsList::new(vec![bof_f3()]); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -254,7 +255,7 @@ mod tests { #[test] fn cut_lines_handle_negative_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_NEG1]); + opt.bounds = UserBoundsList::new(vec![bof_neg1()]); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -265,7 +266,7 @@ mod tests { #[test] fn cut_lines_ignore_last_empty_when_using_positive_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F3]); + opt.bounds = UserBoundsList::new(vec![bof_f3()]); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -283,7 +284,7 @@ mod tests { #[test] fn cut_lines_ignore_last_empty_when_using_negative_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_NEG1]); + opt.bounds = UserBoundsList::new(vec![bof_neg1()]); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -301,7 +302,7 @@ mod tests { #[test] fn fwd_cut_zero_delimited() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F1]); + opt.bounds = UserBoundsList::new(vec![bof_f1()]); opt.eol = EOL::Zero; opt.delimiter = String::from("\0"); @@ -314,7 +315,7 @@ mod tests { #[test] fn cut_lines_zero_delimited() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![BOF_F1]); + opt.bounds = UserBoundsList::new(vec![bof_f1()]); opt.eol = EOL::Zero; opt.delimiter = String::from("\0"); diff --git a/src/cut_str.rs b/src/cut_str.rs index 3867d4a..6417367 100644 --- a/src/cut_str.rs +++ b/src/cut_str.rs @@ -2,7 +2,7 @@ use anyhow::{bail, Result}; use std::io::{BufRead, Write}; use std::ops::Range; -use crate::bounds::{BoundOrFiller, BoundsType, Side, UserBounds, UserBoundsList}; +use crate::bounds::{BoundOrFiller, BoundsType, Side, UserBounds, UserBoundsList, UserBoundsTrait}; use crate::json::escape_json; use crate::options::{Opt, Trim}; use crate::read_utils::read_line_with_eol; diff --git a/src/fast_lane.rs b/src/fast_lane.rs index a93527e..0e7ab1f 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -1,4 +1,4 @@ -use crate::bounds::{BoundOrFiller, BoundsType, Side, UserBounds, UserBoundsList}; +use crate::bounds::{BoundOrFiller, BoundsType, Side, UserBounds, UserBoundsList, UserBoundsTrait}; use crate::options::{Opt, Trim, EOL}; use anyhow::Result; use bstr::ByteSlice; From 0032bb92a1e71cc94616143f063ceaa7a41ea545 Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Tue, 13 Feb 2024 00:26:51 -0800 Subject: [PATCH 4/8] Refactor fast lane inner match to be more ergonomic --- src/fast_lane.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/fast_lane.rs b/src/fast_lane.rs index 0e7ab1f..7f092ca 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -87,17 +87,17 @@ fn cut_str_fast_lane( .iter() .enumerate() .try_for_each(|(bounds_idx, bof)| -> Result<()> { - let b = match bof { + match bof { BoundOrFiller::Filler(f) => { stdout.write_all(f.as_bytes())?; - return Ok(()); } - BoundOrFiller::Bound(b) => b, - }; - - let is_last = bounds_idx == bounds.len() - 1; + BoundOrFiller::Bound(b) => { + let is_last = bounds_idx == bounds.len() - 1; - output_parts(buffer, b, fields, stdout, is_last, opt) + output_parts(buffer, b, fields, stdout, is_last, opt)?; + } + }; + Ok(()) })?; } } From c2ae21fc911f26cb9076f14be0f5b2460e4ed32d Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Wed, 14 Feb 2024 23:49:13 -0800 Subject: [PATCH 5/8] Do not clone UserBoundsList in fast lane --- src/fast_lane.rs | 74 ++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 44 deletions(-) diff --git a/src/fast_lane.rs b/src/fast_lane.rs index 7f092ca..7b92e55 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -4,7 +4,6 @@ use anyhow::Result; use bstr::ByteSlice; use std::convert::TryFrom; use std::io::{self, BufRead}; -use std::str::FromStr; use std::{io::Write, ops::Range}; use bstr::io::BufReadExt; @@ -135,32 +134,19 @@ fn output_parts( } #[derive(Debug)] -pub struct FastOpt { +pub struct FastOpt<'a> { delimiter: u8, join: bool, eol: EOL, - bounds: UserBoundsList, + bounds: &'a UserBoundsList, only_delimited: bool, trim: Option, } -impl Default for FastOpt { - fn default() -> Self { - Self { - delimiter: b'\t', - join: false, - eol: EOL::Newline, - bounds: UserBoundsList::from_str("1:").unwrap(), - only_delimited: false, - trim: None, - } - } -} - -impl TryFrom<&Opt> for FastOpt { +impl<'a> TryFrom<&'a Opt> for FastOpt<'a> { type Error = &'static str; - fn try_from(value: &Opt) -> Result { + fn try_from(value: &'a Opt) -> Result { if value.delimiter.as_bytes().len() != 1 { return Err("Delimiter must be 1 byte wide for FastOpt"); } @@ -183,7 +169,7 @@ impl TryFrom<&Opt> for FastOpt { delimiter, join: value.join, eol: value.eol, - bounds: value.bounds.clone(), + bounds: &value.bounds, only_delimited: value.only_delimited, trim: value.trim, }) @@ -225,10 +211,17 @@ mod tests { use super::*; - fn make_fields_opt() -> FastOpt { + fn make_fields_opt(bounds_as_text: &str) -> FastOpt<'static> { + let boxed_bounds = Box::new(UserBoundsList::from_str(bounds_as_text).unwrap()); + let bounds: &'static mut UserBoundsList = Box::leak(boxed_bounds); + FastOpt { delimiter: b'-', - ..FastOpt::default() + join: false, + eol: EOL::Newline, + bounds, + only_delimited: false, + trim: None, } } @@ -237,7 +230,7 @@ mod tests { // read_and_cut_str is difficult to test, let's verify at least // that it reads the input and appears to call cut_str - let opt = make_fields_opt(); + let opt = make_fields_opt("1:"); let mut input = b"foo".as_slice(); let mut output = Vec::new(); read_and_cut_text_as_bytes(&mut input, &mut output, &opt).unwrap(); @@ -266,7 +259,7 @@ mod tests { #[test] fn cut_str_echo_non_delimited_strings() { - let opt = make_fields_opt(); + let opt = make_fields_opt("1:"); // non-empty line missing the delimiter let line = b"foo"; @@ -297,7 +290,7 @@ mod tests { #[test] fn cut_str_skip_non_delimited_strings_when_requested() { - let mut opt = make_fields_opt(); + let mut opt = make_fields_opt("1:"); opt.only_delimited = true; @@ -330,11 +323,10 @@ mod tests { #[test] fn cut_str_it_cut_a_field() { - let mut opt = make_fields_opt(); + let opt = make_fields_opt("1"); let (mut output, mut fields) = make_cut_str_buffers(); let line = b"a-b-c"; - opt.bounds = UserBoundsList::from_str("1").unwrap(); cut_str_fast_lane( line, @@ -349,12 +341,11 @@ mod tests { #[test] fn cut_str_it_cut_with_negative_indices() { - let mut opt = make_fields_opt(); + // just one negative index + let opt = make_fields_opt("-1"); let line = b"a-b-c"; - // just one negative index - opt.bounds = UserBoundsList::from_str("-1").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( line, @@ -367,7 +358,7 @@ mod tests { assert_eq!(output, b"c\n".as_slice()); // multiple negative indices, in forward order - opt.bounds = UserBoundsList::from_str("-2,-1").unwrap(); + let opt = make_fields_opt("-2,-1"); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( line, @@ -380,7 +371,7 @@ mod tests { assert_eq!(output, b"bc\n".as_slice()); // multiple negative indices, in non-forward order - opt.bounds = UserBoundsList::from_str("-1,-2").unwrap(); + let opt = make_fields_opt("-1,-2"); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( line, @@ -395,7 +386,7 @@ mod tests { // mix positive and negative indices // (this is particularly useful to verify that we don't screw // up optimizations on last field to check) - opt.bounds = UserBoundsList::from_str("-1,1").unwrap(); + let opt = make_fields_opt("-1,1"); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( line, @@ -410,11 +401,10 @@ mod tests { #[test] fn cut_str_it_cut_consecutive_delimiters() { - let mut opt = make_fields_opt(); + let opt = make_fields_opt("1,3"); let (mut output, mut fields) = make_cut_str_buffers(); let line = b"a-b-c"; - opt.bounds = UserBoundsList::from_str("1,3").unwrap(); cut_str_fast_lane( line, @@ -429,12 +419,11 @@ mod tests { #[test] fn cut_str_it_supports_zero_terminated_lines() { - let mut opt = make_fields_opt(); + let mut opt = make_fields_opt("2"); let (mut output, mut fields) = make_cut_str_buffers(); opt.eol = EOL::Zero; let line = b"a-b-c"; - opt.bounds = UserBoundsList::from_str("2").unwrap(); cut_str_fast_lane( line, @@ -449,11 +438,10 @@ mod tests { #[test] fn cut_str_it_join_fields() { - let mut opt = make_fields_opt(); + let mut opt = make_fields_opt("1,3"); let (mut output, mut fields) = make_cut_str_buffers(); let line = b"a-b-c"; - opt.bounds = UserBoundsList::from_str("1,3").unwrap(); opt.join = true; cut_str_fast_lane( @@ -469,11 +457,10 @@ mod tests { #[test] fn cut_str_it_format_fields() { - let mut opt = make_fields_opt(); + let opt = make_fields_opt("{1} < {3} > {2}"); let (mut output, mut fields) = make_cut_str_buffers(); let line = b"a-b-c"; - opt.bounds = UserBoundsList::from_str("{1} < {3} > {2}").unwrap(); cut_str_fast_lane( line, @@ -488,12 +475,11 @@ mod tests { #[test] fn cut_str_it_trim_fields() { - let mut opt = make_fields_opt(); + let mut opt = make_fields_opt("1,3,-1"); let line = b"--a--b--c--"; // check Trim::Both opt.trim = Some(Trim::Both); - opt.bounds = UserBoundsList::from_str("1,3,-1").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( @@ -507,8 +493,8 @@ mod tests { assert_eq!(output, b"abc\n".as_slice()); // check Trim::Left + let mut opt = make_fields_opt("1,3,-3"); opt.trim = Some(Trim::Left); - opt.bounds = UserBoundsList::from_str("1,3,-3").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( @@ -522,8 +508,8 @@ mod tests { assert_eq!(output, b"abc\n".as_slice()); // check Trim::Right + let mut opt = make_fields_opt("3,5,-1"); opt.trim = Some(Trim::Right); - opt.bounds = UserBoundsList::from_str("3,5,-1").unwrap(); let (mut output, mut fields) = make_cut_str_buffers(); cut_str_fast_lane( From 3cc976a33d74e36b589487a217e5fb072d10cade Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Thu, 15 Feb 2024 00:15:35 -0800 Subject: [PATCH 6/8] Store is_last information into UserBounds --- src/bounds.rs | 16 ++++++++++++++-- src/cut_str.rs | 3 ++- src/fast_lane.rs | 30 ++++++++++++------------------ 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/bounds.rs b/src/bounds.rs index 2e8d946..238dcbb 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -120,20 +120,27 @@ impl From> for UserBoundsList { }; let mut rightmost_bound: Option = None; + let mut last_bound: Option<&mut UserBounds> = None; // This is risky, we could end up using last_interesting_field // internally. Didn't spend much time to figure out how to use // is_sortable without major refactoring. if ubl.is_sortable() { - ubl.list.iter().for_each(|bof| { + ubl.list.iter_mut().for_each(|bof| { if let BoundOrFiller::Bound(b) = bof { if rightmost_bound.is_none() || b.r > rightmost_bound.unwrap() { rightmost_bound = Some(b.r); } + + last_bound = Some(b); } }); } + last_bound + .expect("UserBoundsList must contain at least one UserBounds") + .is_last = true; + ubl.last_interesting_field = rightmost_bound.unwrap_or(Side::Continue); ubl } @@ -294,6 +301,7 @@ impl PartialOrd for Side { pub struct UserBounds { pub l: Side, pub r: Side, + pub is_last: bool, } impl fmt::Display for UserBounds { @@ -361,7 +369,11 @@ pub trait UserBoundsTrait { impl UserBoundsTrait for UserBounds { fn new(l: Side, r: Side) -> Self { - UserBounds { l, r } + UserBounds { + l, + r, + is_last: false, + } } /** * Check if a field is between the bounds. diff --git a/src/cut_str.rs b/src/cut_str.rs index 6417367..2072230 100644 --- a/src/cut_str.rs +++ b/src/cut_str.rs @@ -313,7 +313,8 @@ pub fn cut_str( b, BoundOrFiller::Bound(UserBounds { l: x, - r: y + r: y, + is_last: _, }) if x != y || x == &Side::Continue ) }) { diff --git a/src/fast_lane.rs b/src/fast_lane.rs index 7b92e55..9a87629 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -82,22 +82,17 @@ fn cut_str_fast_lane( stdout.write_all(buffer)?; } _ => { - bounds - .iter() - .enumerate() - .try_for_each(|(bounds_idx, bof)| -> Result<()> { - match bof { - BoundOrFiller::Filler(f) => { - stdout.write_all(f.as_bytes())?; - } - BoundOrFiller::Bound(b) => { - let is_last = bounds_idx == bounds.len() - 1; - - output_parts(buffer, b, fields, stdout, is_last, opt)?; - } - }; - Ok(()) - })?; + bounds.iter().try_for_each(|bof| -> Result<()> { + match bof { + BoundOrFiller::Filler(f) => { + stdout.write_all(f.as_bytes())?; + } + BoundOrFiller::Bound(b) => { + output_parts(buffer, b, fields, stdout, opt)?; + } + }; + Ok(()) + })?; } } @@ -114,7 +109,6 @@ fn output_parts( // where to find the parts inside `line` fields: &[Range], stdout: &mut W, - is_last: bool, opt: &FastOpt, ) -> Result<()> { let r = b.try_into_range(fields.len())?; @@ -126,7 +120,7 @@ fn output_parts( let field_to_print = output; stdout.write_all(field_to_print)?; - if opt.join && !(is_last) { + if opt.join && !b.is_last { stdout.write_all(&[opt.delimiter])?; } From 13306166d72cd1a49e022b15adad31016e446327 Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Thu, 15 Feb 2024 18:19:23 -0800 Subject: [PATCH 7/8] Remove UserBoundsList::new --- src/bounds.rs | 46 ++++++++++++++++++++++++---------------------- src/cut_lines.rs | 47 +++++++++++++++-------------------------------- src/options.rs | 4 ++-- 3 files changed, 41 insertions(+), 56 deletions(-) diff --git a/src/bounds.rs b/src/bounds.rs index 238dcbb..d0aec39 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -122,19 +122,20 @@ impl From> for UserBoundsList { let mut rightmost_bound: Option = None; let mut last_bound: Option<&mut UserBounds> = None; - // This is risky, we could end up using last_interesting_field - // internally. Didn't spend much time to figure out how to use - // is_sortable without major refactoring. - if ubl.is_sortable() { - ubl.list.iter_mut().for_each(|bof| { - if let BoundOrFiller::Bound(b) = bof { - if rightmost_bound.is_none() || b.r > rightmost_bound.unwrap() { - rightmost_bound = Some(b.r); - } - - last_bound = Some(b); + let is_sortable = ubl.is_sortable(); + + ubl.list.iter_mut().for_each(|bof| { + if let BoundOrFiller::Bound(b) = bof { + if rightmost_bound.is_none() || b.r > rightmost_bound.unwrap() { + rightmost_bound = Some(b.r); } - }); + + last_bound = Some(b); + } + }); + + if !is_sortable { + rightmost_bound = None; } last_bound @@ -149,15 +150,14 @@ impl From> for UserBoundsList { impl FromStr for UserBoundsList { type Err = anyhow::Error; fn from_str(s: &str) -> Result { + if s.trim().is_empty() { + bail!("UserBoundsList must contain at least one UserBounds"); + } Ok(parse_bounds_list(s)?.into()) } } impl UserBoundsList { - pub fn new(list: Vec) -> Self { - list.into() - } - /// Detect whether the list can be sorted. /// It can be sorted only if every bound /// has the same sign (all positive or all negative). @@ -423,14 +423,15 @@ impl UserBoundsTrait for UserBounds { /// # use tuc::bounds::{UserBounds, UserBoundsTrait}; /// # use std::ops::Range; /// # use tuc::bounds::Side; + /// # use std::str::FromStr; /// /// assert_eq!( - /// (UserBounds { l: Side::Some(1), r: Side::Some(2) }).try_into_range(5).unwrap(), + /// UserBounds::from_str("1:2").unwrap().try_into_range(5).unwrap(), /// Range { start: 0, end: 2} // 2, not 1, because it's exclusive /// ); /// /// assert_eq!( - /// (UserBounds { l: Side::Some(1), r: Side::Continue }).try_into_range(5).unwrap(), + /// UserBounds::from_str("1:").unwrap().try_into_range(5).unwrap(), /// Range { start: 0, end: 5} /// ); /// ``` @@ -769,9 +770,12 @@ mod tests { } #[test] - fn test_user_bounds_is_sortable() { - assert!(UserBoundsList::new(Vec::new()).is_sortable()); + fn test_user_bounds_cannot_be_empty() { + assert!(UserBoundsList::from_str("").is_err()); + } + #[test] + fn test_user_bounds_is_sortable() { assert!(UserBoundsList::from_str("1").unwrap().is_sortable()); assert!(UserBoundsList::from_str("1,2").unwrap().is_sortable()); @@ -787,8 +791,6 @@ mod tests { #[test] fn test_vec_of_bounds_is_sorted() { - assert!(UserBoundsList::from_str("").unwrap().is_sorted()); - assert!(UserBoundsList::from_str("1").unwrap().is_sorted()); assert!(UserBoundsList::from_str("1,2").unwrap().is_sorted()); diff --git a/src/cut_lines.rs b/src/cut_lines.rs index ffe9f0d..f781014 100644 --- a/src/cut_lines.rs +++ b/src/cut_lines.rs @@ -131,8 +131,10 @@ pub fn read_and_cut_lines( #[cfg(test)] mod tests { + use std::str::FromStr; + use crate::{ - bounds::{BoundsType, UserBounds, UserBoundsList}, + bounds::{BoundsType, UserBoundsList}, options::EOL, }; @@ -147,29 +149,10 @@ mod tests { } } - fn bof_f1() -> BoundOrFiller { - BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))) - } - - fn bof_f2() -> BoundOrFiller { - BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))) - } - fn bof_f3() -> BoundOrFiller { - BoundOrFiller::Bound(UserBounds::new(Side::Some(3), Side::Some(3))) - } - fn bof_r2_3() -> BoundOrFiller { - BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(3))) - } - fn bof_neg1() -> BoundOrFiller { - BoundOrFiller::Bound(UserBounds::new(Side::Some(-1), Side::Some(-1))) - } - fn bof_f1_to_end() -> BoundOrFiller { - BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Continue)) - } #[test] fn fwd_cut_one_field() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f1()]); + opt.bounds = UserBoundsList::from_str("1").unwrap(); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -180,7 +163,7 @@ mod tests { #[test] fn fwd_cut_multiple_fields() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f1(), bof_f2()]); + opt.bounds = UserBoundsList::from_str("1:2").unwrap(); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -191,7 +174,7 @@ mod tests { #[test] fn fwd_support_ranges() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f1(), bof_r2_3()]); + opt.bounds = UserBoundsList::from_str("1,2:3").unwrap(); let mut input = b"a\nb\nc".as_slice(); let mut output = Vec::with_capacity(100); @@ -202,7 +185,7 @@ mod tests { #[test] fn fwd_supports_no_join() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f1(), bof_f3()]); + opt.bounds = UserBoundsList::from_str("1,3").unwrap(); opt.join = false; let mut input = b"a\nb\nc".as_slice(); @@ -214,7 +197,7 @@ mod tests { #[test] fn fwd_supports_no_right_bound() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f1_to_end()]); + opt.bounds = UserBoundsList::from_str("1:").unwrap(); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -225,7 +208,7 @@ mod tests { #[test] fn fwd_handle_out_of_bounds() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f3()]); + opt.bounds = UserBoundsList::from_str("3").unwrap(); opt.join = true; let mut input = b"a\nb".as_slice(); @@ -237,7 +220,7 @@ mod tests { #[test] fn fwd_ignore_last_empty() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f3()]); + opt.bounds = UserBoundsList::from_str("3").unwrap(); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -255,7 +238,7 @@ mod tests { #[test] fn cut_lines_handle_negative_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_neg1()]); + opt.bounds = UserBoundsList::from_str("-1").unwrap(); let mut input = b"a\nb".as_slice(); let mut output = Vec::with_capacity(100); @@ -266,7 +249,7 @@ mod tests { #[test] fn cut_lines_ignore_last_empty_when_using_positive_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f3()]); + opt.bounds = UserBoundsList::from_str("3").unwrap(); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -284,7 +267,7 @@ mod tests { #[test] fn cut_lines_ignore_last_empty_when_using_negative_idx() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_neg1()]); + opt.bounds = UserBoundsList::from_str("-1").unwrap(); let mut input1 = b"a\nb".as_slice(); let mut input2 = b"a\nb\n".as_slice(); @@ -302,7 +285,7 @@ mod tests { #[test] fn fwd_cut_zero_delimited() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f1()]); + opt.bounds = UserBoundsList::from_str("1").unwrap(); opt.eol = EOL::Zero; opt.delimiter = String::from("\0"); @@ -315,7 +298,7 @@ mod tests { #[test] fn cut_lines_zero_delimited() { let mut opt = make_lines_opt(); - opt.bounds = UserBoundsList::new(vec![bof_f1()]); + opt.bounds = UserBoundsList::from_str("1").unwrap(); opt.eol = EOL::Zero; opt.delimiter = String::from("\0"); diff --git a/src/options.rs b/src/options.rs index 603b10c..c93eeb0 100644 --- a/src/options.rs +++ b/src/options.rs @@ -1,4 +1,4 @@ -use crate::bounds::{BoundOrFiller, BoundsType, UserBounds, UserBoundsList}; +use crate::bounds::{BoundsType, UserBoundsList}; use anyhow::Result; use std::str::FromStr; @@ -54,7 +54,7 @@ impl Default for Opt { Opt { delimiter: String::from("-"), eol: EOL::Newline, - bounds: UserBoundsList::new(vec![BoundOrFiller::Bound(UserBounds::default())]), + bounds: UserBoundsList::from_str("1:").unwrap(), bounds_type: BoundsType::Fields, only_delimited: false, greedy_delimiter: false, From 894ebe12f3aa94f220de6eb6947bbf654d1e1aae Mon Sep 17 00:00:00 2001 From: Riccardo Attilio Galli Date: Thu, 15 Feb 2024 18:53:34 -0800 Subject: [PATCH 8/8] Store delimiter positions with usize in fast lane --- src/fast_lane.rs | 59 ++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/src/fast_lane.rs b/src/fast_lane.rs index 9a87629..7f49d3b 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -3,8 +3,8 @@ use crate::options::{Opt, Trim, EOL}; use anyhow::Result; use bstr::ByteSlice; use std::convert::TryFrom; +use std::io::Write; use std::io::{self, BufRead}; -use std::{io::Write, ops::Range}; use bstr::io::BufReadExt; @@ -18,11 +18,12 @@ fn trim<'a>(buffer: &'a [u8], trim_kind: &Trim, delimiter: u8) -> &'a [u8] { } } +#[inline(always)] fn cut_str_fast_lane( initial_buffer: &[u8], opt: &FastOpt, stdout: &mut W, - fields: &mut Vec>, + fields: &mut Vec, last_interesting_field: Side, ) -> Result<()> { let mut buffer = initial_buffer; @@ -40,8 +41,6 @@ fn cut_str_fast_lane( let bounds = &opt.bounds; - let mut prev_field_start = 0; - let mut curr_field = 0; fields.clear(); @@ -49,10 +48,7 @@ fn cut_str_fast_lane( for i in memchr::memchr_iter(opt.delimiter, buffer) { curr_field += 1; - let (start, end) = (prev_field_start, i); // end exclusive - prev_field_start = i + 1; - - fields.push(Range { start, end }); + fields.push(i); if Side::Some(curr_field) == last_interesting_field { // We have no use for any other fields in this line @@ -65,20 +61,16 @@ fn cut_str_fast_lane( return Ok(()); } - // After the last loop ended, everything remaining is the field - // after the last delimiter (we want it), or "useless" fields after the - // last one that the user is interested in (and we can ignore them). if Side::Some(curr_field) != last_interesting_field { - fields.push(Range { - start: prev_field_start, - end: buffer.len(), - }); + // We reached the end of the line. Who knows, maybe + // the user is interested in this field too. + fields.push(buffer.len()); } let num_fields = fields.len(); match num_fields { - 1 if bounds.len() == 1 && fields[0].end == buffer.len() => { + 1 if bounds.len() == 1 && fields[0] == buffer.len() => { stdout.write_all(buffer)?; } _ => { @@ -107,14 +99,19 @@ fn output_parts( // which parts to print b: &UserBounds, // where to find the parts inside `line` - fields: &[Range], + fields: &[usize], stdout: &mut W, opt: &FastOpt, ) -> Result<()> { let r = b.try_into_range(fields.len())?; - let idx_start = fields[r.start].start; - let idx_end = fields[r.end - 1].end; + let idx_start = if r.start == 0 { + 0 + } else { + fields[r.start - 1] + 1 + }; + let idx_end = fields[r.end - 1]; + let output = &line[idx_start..idx_end]; let field_to_print = output; @@ -175,7 +172,7 @@ pub fn read_and_cut_text_as_bytes( stdout: &mut W, opt: &FastOpt, ) -> Result<()> { - let mut fields: Vec> = Vec::with_capacity(16); + let mut fields: Vec = Vec::with_capacity(16); let last_interesting_field = opt.bounds.last_interesting_field; @@ -245,7 +242,7 @@ mod tests { ); } - fn make_cut_str_buffers() -> (Vec, Vec>) { + fn make_cut_str_buffers() -> (Vec, Vec) { let output = Vec::new(); let fields = Vec::new(); (output, fields) @@ -451,6 +448,21 @@ mod tests { #[test] fn cut_str_it_format_fields() { + let opt = make_fields_opt("{2}"); + let (mut output, mut fields) = make_cut_str_buffers(); + + let line = b"a-b-c"; + + cut_str_fast_lane( + line, + &opt, + &mut output, + &mut fields, + opt.bounds.last_interesting_field, + ) + .unwrap(); + assert_eq!(output.to_str_lossy(), b"b\n".as_slice().to_str_lossy()); + let opt = make_fields_opt("{1} < {3} > {2}"); let (mut output, mut fields) = make_cut_str_buffers(); @@ -464,7 +476,10 @@ mod tests { opt.bounds.last_interesting_field, ) .unwrap(); - assert_eq!(output, b"a < c > b\n".as_slice()); + assert_eq!( + output.to_str_lossy(), + b"a < c > b\n".as_slice().to_str_lossy() + ); } #[test]