Skip to content

Commit

Permalink
Replace delimiters in characters as we do for fields
Browse files Browse the repository at this point in the history
  • Loading branch information
riquito committed Dec 12, 2023
1 parent b4c31c7 commit 6772032
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 10 deletions.
126 changes: 126 additions & 0 deletions src/bounds.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use anyhow::{bail, Result};
use std::cmp::Ordering;
use std::convert::TryInto;
use std::fmt;
use std::ops::Range;
use std::str::FromStr;
Expand Down Expand Up @@ -170,6 +171,24 @@ impl UserBoundsList {
pub fn is_forward_only(&self) -> bool {
self.is_sortable() && self.is_sorted() && !self.has_negative_indices()
}

/**
* Create a new UserBoundsList with only the bounds (no fillers)
* and with every ranged bound converted into single slot bounds.
*/
pub fn unpack(&self, num_fields: usize) -> UserBoundsList {
UserBoundsList(
self.0
.iter()
.filter_map(|x| match x {
BoundOrFiller::Filler(_) => None,
BoundOrFiller::Bound(b) => Some(b.unpack(num_fields)),
})
.flatten()
.map(BoundOrFiller::Bound)
.collect(),
)
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
Expand Down Expand Up @@ -297,6 +316,38 @@ impl UserBounds {
_ => Ok(false),
}
}

/**
* Transform a ranged bound into a list of one or more
* 1 slot bound
*/
pub fn unpack(&self, num_fields: usize) -> Vec<UserBounds> {
let mut bounds = Vec::new();
let n: i32 = num_fields
.try_into()
.expect("num_fields was bigger than expected");

let (start, end): (i32, i32) = match (self.l, self.r) {
(Side::Continue, Side::Continue) => (1, n),
(Side::Continue, Side::Some(right)) => {
(1, if right > 0 { right } else { n + 1 + right })
}
(Side::Some(left), Side::Some(right)) => (
if left > 0 { left } else { n + 1 + left },
if right > 0 { right } else { n + 1 + right },
),
(Side::Some(left), Side::Continue) => (if left > 0 { left } else { n + 1 + left }, n),
};

for i in start..=end {
bounds.push(UserBounds {
l: Side::Some(i),
r: Side::Some(i),
})
}

bounds
}
}

impl Ord for UserBounds {
Expand Down Expand Up @@ -569,6 +620,59 @@ mod tests {
);
}

#[test]
fn test_unpack_bound() {
assert_eq!(
UserBounds::new(Side::Some(1), Side::Some(1)).unpack(2),
vec![UserBounds::new(Side::Some(1), Side::Some(1))],
);

assert_eq!(
UserBounds::new(Side::Some(1), Side::Continue).unpack(2),
vec![
UserBounds::new(Side::Some(1), Side::Some(1)),
UserBounds::new(Side::Some(2), Side::Some(2))
],
);

assert_eq!(
UserBounds::new(Side::Continue, Side::Some(2)).unpack(2),
vec![
UserBounds::new(Side::Some(1), Side::Some(1)),
UserBounds::new(Side::Some(2), Side::Some(2))
],
);

assert_eq!(
UserBounds::new(Side::Continue, Side::Continue).unpack(2),
vec![
UserBounds::new(Side::Some(1), Side::Some(1)),
UserBounds::new(Side::Some(2), Side::Some(2))
],
);

assert_eq!(
UserBounds::new(Side::Some(-1), Side::Continue).unpack(2),
vec![UserBounds::new(Side::Some(2), Side::Some(2)),],
);

assert_eq!(
UserBounds::new(Side::Continue, Side::Some(-1)).unpack(2),
vec![
UserBounds::new(Side::Some(1), Side::Some(1)),
UserBounds::new(Side::Some(2), Side::Some(2))
],
);

assert_eq!(
UserBounds::new(Side::Some(-2), Side::Some(-1)).unpack(2),
vec![
UserBounds::new(Side::Some(1), Side::Some(1)),
UserBounds::new(Side::Some(2), Side::Some(2))
],
);
}

#[test]
fn test_user_bounds_is_sortable() {
assert!(UserBoundsList(Vec::new()).is_sortable());
Expand Down Expand Up @@ -615,4 +719,26 @@ mod tests {
.unwrap()
.is_forward_only());
}

#[test]
fn test_vec_of_bounds_can_unpack() {
assert_eq!(
UserBoundsList::from_str("1,:1,2:3,4:").unwrap().unpack(4).0,
vec![
BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))),
BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))),
BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))),
BoundOrFiller::Bound(UserBounds::new(Side::Some(3), Side::Some(3))),
BoundOrFiller::Bound(UserBounds::new(Side::Some(4), Side::Some(4))),
]
);

assert_eq!(
UserBoundsList::from_str("a{1}b{2}c").unwrap().unpack(4).0,
vec![
BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))),
BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))),
]
);
}
}
44 changes: 34 additions & 10 deletions src/cut_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use anyhow::{bail, Result};
use std::io::{BufRead, Write};
use std::ops::Range;

use crate::bounds::{bounds_to_std_range, BoundOrFiller, BoundsType};
use crate::bounds::{
bounds_to_std_range, BoundOrFiller, BoundsType, Side, UserBounds, UserBoundsList,
};
use crate::json::escape_json;
use crate::options::{Opt, Trim};
use crate::read_utils::read_line_with_eol;
Expand Down Expand Up @@ -288,12 +290,39 @@ pub fn cut_str<W: Write>(
stdout.write_all(b"[")?;
}

let _bounds: UserBoundsList;
let mut bounds = &opt.bounds;

if opt.bounds_type == BoundsType::Characters && opt.replace_delimiter.is_some() {
// Unpack bounds such as 1:3 or 2: into single character bounds
// such as 1:1,2:2,3:3 etc...
// We need it to be able to insert a replace character between every field.
// It can cost quite a bit and is risky because it may end up creating a
// char vector of the whole input (then again -c with -r is quite the
// rare usage).

// Start by checking if we actually need to rewrite the bounds
if bounds.0.iter().any(|b| {
matches!(
b,
BoundOrFiller::Bound(UserBounds {
l: x,
r: y
}) if x != y || x == &Side::Continue
)
}) {
// Yep, there at least a range bound. Let's do it
_bounds = bounds.unpack(bounds_as_ranges.len());
bounds = &_bounds;
}
}

match bounds_as_ranges.len() {
1 if opt.bounds.0.len() == 1 => {
1 if bounds.0.len() == 1 => {
write_maybe_as_json!(stdout, line, opt.json);
}
_ => {
opt.bounds
bounds
.0
.iter()
.enumerate()
Expand Down Expand Up @@ -334,7 +363,7 @@ pub fn cut_str<W: Write>(
let field_to_print = maybe_replace_delimiter(output, opt);
write_maybe_as_json!(stdout, field_to_print, opt.json);

if opt.join && !(i == opt.bounds.0.len() - 1 && idx_r == n_ranges - 1) {
if opt.join && !(i == bounds.0.len() - 1 && idx_r == n_ranges - 1) {
stdout.write_all(
opt.replace_delimiter
.as_ref()
Expand Down Expand Up @@ -733,12 +762,7 @@ mod tests {
opt.join = true; // implied when using BoundsType::Characters

cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap();

// In theory between 3:4 there is the (empty) delimiter, and we
// should replace it. I think that for Characters it makes more sense
// to replace only the delimiters between the selected bounds
// (for BoundsType::FIELDS instead we replace inside a ranged bound too).
assert_eq!(&String::from_utf8_lossy(&output), "😁-🀩-😝😎\n");
assert_eq!(&String::from_utf8_lossy(&output), "😁-🀩-😝-😎\n");
}

#[test]
Expand Down

0 comments on commit 6772032

Please sign in to comment.