diff --git a/CHANGELOG.md b/CHANGELOG.md index 6eebad5..8b7a26a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] +- feat: new option --json to format output as JSON array - fix: field formatting is now applied to field 1 even if it's the only one present and with no delimiters around diff --git a/README.md b/README.md index 8efa944..bc30c49 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ FLAGS: -h, --help Print this help and exit -m, --complement Invert fields (e.g. '2' becomes '1,3:') -j, --(no-)join Print selected parts with delimiter inbetween + --json Print fields as a JSON array of strings OPTIONS: -f, --fields Fields to keep, 1-indexed, comma separated. @@ -52,7 +53,7 @@ OPTIONS: Fields can be negative (-1 is the last field). [default 1:] - e.g. cutting on '-' the string 'a-b-c-d' + e.g. cutting the string 'a-b-c-d' on '-' -f 1 => a -f 1: => a-b-c-d -f 1:3 => a-b-c @@ -65,7 +66,7 @@ OPTIONS: You can also format the output using {} syntax e.g. - -f '["{1}", "{2}"]' => ["a", "b"] + -f '({1}, {2})' => (a, b) You can escape { and } using {{ and }}. @@ -131,6 +132,12 @@ cba ac ``` +```sh +# Emit JSON output +❯ echo "foo bar baz" | tuc -d ' ' --json +["foo","bar","baz"] +``` + ```sh # Delimiters can be any number of characters long ❯ echo "abc" | tuc -d '' -f 1,3 diff --git a/doc/tuc.1 b/doc/tuc.1 index 62ce3dd..04a170b 100644 --- a/doc/tuc.1 +++ b/doc/tuc.1 @@ -54,6 +54,9 @@ Invert fields (e.g.\ \[aq]2\[aq] becomes \[aq]1,3:\[aq]) .TP .B \-j, --(no-)join Print selected parts with delimiter in between +.TP +.B --json +Print fields as a JSON array of strings .SH OPTIONS .PP \f[B]-f\f[R], \f[B]--fields\f[R] [bounds] @@ -72,31 +75,31 @@ Print selected parts with delimiter in between .PP \ \ \ \ \ \ \ [default 1:] .PP -\ \ \ \ \ \ \ e.g.\ cutting on \[aq]-\[aq] the string \[aq]a-b-c-d\[aq] +\ \ \ \ \ \ \ e.g.\ cutting the string \[aq]a-b-c-d\[aq] on \[aq]-\[aq] .PD 0 .P .PD -\ \ \ \ \ \ \ \ \-f 1 => a +\ \ \ \ \ \ \ \ \ \f[V]-f 1 => a\f[R] .PD 0 .P .PD -\ \ \ \ \ \ \ \ \-f 1: => a-b-c-d +\ \ \ \ \ \ \ \ \ \f[V]-f 1: => a-b-c-d\f[R] .PD 0 .P .PD -\ \ \ \ \ \ \ \ \-f 1:3 => a-b-c +\ \ \ \ \ \ \ \ \ \f[V]-f 1:3 => a-b-c\f[R] .PD 0 .P .PD -\ \ \ \ \ \ \ \ \-f 3,2 => cb +\ \ \ \ \ \ \ \ \ \f[V]-f 3,2 => cb\f[R] .PD 0 .P .PD -\ \ \ \ \ \ \ \ \-f 3,1:2 => ca-b +\ \ \ \ \ \ \ \ \ \f[V]-f 3,1:2 => ca-b\f[R] .PD 0 .P .PD -\ \ \ \ \ \ \ \ \-f -3:-2 => b-c +\ \ \ \ \ \ \ \ \ \f[V]-f -3:-2 => b-c\f[R] .PP \ \ \ \ \ \ \ To re-apply the delimiter add -j, to replace .PD 0 @@ -112,8 +115,7 @@ Print selected parts with delimiter in between .PD 0 .P .PD -\ \ \ \ \ \ \ \ \ -f \[aq][\[dq]{1}\[dq], \[dq]{2}\[dq]]\[aq] => -[\[dq]a\[dq], \[dq]b\[dq]] +\ \ \ \ \ \ \ \ \ \f[V]-f \[aq]({1}, {2})\[aq] => (a, b)\f[R] .PP \ \ \ \ \ \ \ You can escape { and } using {{ and }}. .PP diff --git a/doc/tuc.1.md b/doc/tuc.1.md index bd79ff6..82a2ee0 100644 --- a/doc/tuc.1.md +++ b/doc/tuc.1.md @@ -46,6 +46,8 @@ FLAGS -j, \--(no-)join : Print selected parts with delimiter in between +\--json +: Print fields as a JSON array of strings OPTIONS ======= @@ -57,7 +59,7 @@ OPTIONS | [default 1:] -| e.g. cutting on \'-\' the string \'a-b-c-d\' +| e.g. cutting the string \'a-b-c-d\' on \'-\' | `-f 1 => a` | `-f 1: => a-b-c-d` | `-f 1:3 => a-b-c` @@ -70,7 +72,7 @@ OPTIONS | You can also format the output using {} syntax | e.g. -| -f \'[\"{1}\", \"{2}\"]\' => [\"a\", \"b\"] +| `-f '({1}, {2})' => (a, b)` | You can escape { and } using {{ and }}. diff --git a/src/bin/tuc.rs b/src/bin/tuc.rs index 6458227..83b7092 100644 --- a/src/bin/tuc.rs +++ b/src/bin/tuc.rs @@ -1,7 +1,7 @@ use anyhow::Result; use std::io::Write; use std::str::FromStr; -use tuc::bounds::{BoundsType, UserBoundsList}; +use tuc::bounds::{BoundOrFiller, BoundsType, UserBoundsList}; use tuc::cut_bytes::read_and_cut_bytes; use tuc::cut_lines::read_and_cut_lines; use tuc::cut_str::read_and_cut_str; @@ -33,6 +33,7 @@ FLAGS: -h, --help Print this help and exit -m, --complement Invert fields (e.g. '2' becomes '1,3:') -j, --(no-)join Print selected parts with delimiter in between + --json Print fields as a JSON array of strings OPTIONS: -f, --fields Fields to keep, 1-indexed, comma separated. @@ -40,20 +41,20 @@ OPTIONS: Fields can be negative (-1 is the last field). [default 1:] - e.g. cutting on '-' the string 'a-b-c-d' - -f 1 => a - -f 1: => a-b-c-d - -f 1:3 => a-b-c - -f 3,2 => cb - -f 3,1:2 => ca-b - -f -3:-2 => b-c + e.g. cutting the string 'a-b-c-d' on '-' + -f 1 => a + -f 1: => a-b-c-d + -f 1:3 => a-b-c + -f 3,2 => cb + -f 3,1:2 => ca-b + -f -3:-2 => b-c To re-apply the delimiter add -j, to replace it add -r (followed by the new delimiter). You can also format the output using {} syntax e.g. - -f '["{1}", "{2}"]' => ["a", "b"] + -f '({1}, {2})' => (a, b) You can escape { and } using {{ and }}. @@ -64,7 +65,8 @@ OPTIONS: -d, --delimiter Delimiter used by --fields to cut the text [default: \t] -e, --regex Use a regular expression as delimiter - -r, --replace-delimiter Replace the delimiter with the provided text + -r, --replace-delimiter Replace the delimiter with the provided text. + Implies --join -t, --trim Trim the delimiter (greedy). Valid values are (l|L)eft, (r|R)ight, (b|B)oth @@ -112,7 +114,7 @@ fn parse_args() -> Result { if bounds_type == BoundsType::Fields && (maybe_fields.is_none() || maybe_fields.as_ref().unwrap().0.is_empty()) { - eprintln!("tuc: invariant error. At least 1 field bound is expected with --fields"); + eprintln!("tuc: invariant error. At this point we expected to find at least 1 field bound"); std::process::exit(1); } @@ -125,24 +127,57 @@ fn parse_args() -> Result { }; let greedy_delimiter = pargs.contains(["-g", "--greedy-delimiter"]); - let replace_delimiter = pargs.opt_value_from_str(["-r", "--replace-delimiter"])?; + let mut replace_delimiter = pargs.opt_value_from_str(["-r", "--replace-delimiter"])?; + let has_json = pargs.contains("--json"); let has_join = pargs.contains(["-j", "--join"]); let has_no_join = pargs.contains("--no-join"); if has_join && has_no_join { - eprintln!("tuc: runtime error. You can't pass both --join and --no-join"); + eprintln!( + "tuc: runtime error. It's not possible to use --join and --no-join simultaneously" + ); std::process::exit(1); } - if replace_delimiter.is_some() && has_no_join { - eprintln!("tuc: runtime error. Since --replace implies --join, you can't pass --no-join"); + if has_json && has_no_join { + eprintln!("tuc: runtime error. Using both --json and --no-join is not permitted"); std::process::exit(1); } + if replace_delimiter.is_some() { + if has_no_join { + eprintln!("tuc: runtime error. You can't pass --no-join when using --replace, which implies --join"); + std::process::exit(1); + } else if has_json { + eprintln!("tuc: runtime error. The use of --replace with --json is not supported"); + std::process::exit(1); + } + } + + if bounds_type == BoundsType::Characters && has_no_join { + eprintln!( + "tuc: runtime error. Since --characters implies --join, you can't pass --no-join" + ); + std::process::exit(1); + } + + if has_json { + replace_delimiter = Some(",".to_owned()); + } + let join = has_join + || has_json || replace_delimiter.is_some() - || (bounds_type == BoundsType::Lines && !has_no_join); + || (bounds_type == BoundsType::Lines && !has_no_join) + || (bounds_type == BoundsType::Characters); + + if has_json && bounds_type != BoundsType::Characters && bounds_type != BoundsType::Fields { + eprintln!( + "tuc: runtime error. --json support is available only for --fields and --characters" + ); + std::process::exit(1); + } #[cfg(not(feature = "regex"))] let regex_bag = None; @@ -161,8 +196,24 @@ fn parse_args() -> Result { }), }); - if regex_bag.is_some() && !cfg!(feature = "regex") { - eprintln!("tuc: runtime error. This version of tuc was compiled without regex support"); + if regex_bag.is_some() && cfg!(not(feature = "regex")) { + eprintln!("tuc: invariant error. There should not be any regex when compiled without regex support"); + std::process::exit(1); + } + + let bounds = maybe_fields + .or(maybe_characters) + .or(maybe_bytes) + .or(maybe_lines) + .unwrap(); + + if has_json + && bounds + .0 + .iter() + .any(|s| matches!(s, BoundOrFiller::Filler(_))) + { + eprintln!("tuc: runtime error. Cannot format fields when using --json"); std::process::exit(1); } @@ -178,13 +229,10 @@ fn parse_args() -> Result { EOL::Newline }, join, + json: has_json, delimiter, bounds_type, - bounds: maybe_fields - .or(maybe_characters) - .or(maybe_bytes) - .or(maybe_lines) - .unwrap(), + bounds, replace_delimiter, trim: pargs.opt_value_from_str(["-t", "--trim"])?, regex_bag, diff --git a/src/bounds.rs b/src/bounds.rs index 512d697..b190314 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -1,5 +1,6 @@ use anyhow::{bail, Result}; use std::cmp::Ordering; +use std::convert::TryInto; use std::fmt; use std::ops::Range; use std::str::FromStr; @@ -170,6 +171,24 @@ impl UserBoundsList { pub fn is_forward_only(&self) -> bool { self.is_sortable() && self.is_sorted() && !self.has_negative_indices() } + + /** + * Create a new UserBoundsList with only the bounds (no fillers) + * and with every ranged bound converted into single slot bounds. + */ + pub fn unpack(&self, num_fields: usize) -> UserBoundsList { + UserBoundsList( + self.0 + .iter() + .filter_map(|x| match x { + BoundOrFiller::Filler(_) => None, + BoundOrFiller::Bound(b) => Some(b.unpack(num_fields)), + }) + .flatten() + .map(BoundOrFiller::Bound) + .collect(), + ) + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -297,6 +316,38 @@ impl UserBounds { _ => Ok(false), } } + + /** + * Transform a ranged bound into a list of one or more + * 1 slot bound + */ + pub fn unpack(&self, num_fields: usize) -> Vec { + let mut bounds = Vec::new(); + let n: i32 = num_fields + .try_into() + .expect("num_fields was bigger than expected"); + + let (start, end): (i32, i32) = match (self.l, self.r) { + (Side::Continue, Side::Continue) => (1, n), + (Side::Continue, Side::Some(right)) => { + (1, if right > 0 { right } else { n + 1 + right }) + } + (Side::Some(left), Side::Some(right)) => ( + if left > 0 { left } else { n + 1 + left }, + if right > 0 { right } else { n + 1 + right }, + ), + (Side::Some(left), Side::Continue) => (if left > 0 { left } else { n + 1 + left }, n), + }; + + for i in start..=end { + bounds.push(UserBounds { + l: Side::Some(i), + r: Side::Some(i), + }) + } + + bounds + } } impl Ord for UserBounds { @@ -569,6 +620,59 @@ mod tests { ); } + #[test] + fn test_unpack_bound() { + assert_eq!( + UserBounds::new(Side::Some(1), Side::Some(1)).unpack(2), + vec![UserBounds::new(Side::Some(1), Side::Some(1))], + ); + + assert_eq!( + UserBounds::new(Side::Some(1), Side::Continue).unpack(2), + vec![ + UserBounds::new(Side::Some(1), Side::Some(1)), + UserBounds::new(Side::Some(2), Side::Some(2)) + ], + ); + + assert_eq!( + UserBounds::new(Side::Continue, Side::Some(2)).unpack(2), + vec![ + UserBounds::new(Side::Some(1), Side::Some(1)), + UserBounds::new(Side::Some(2), Side::Some(2)) + ], + ); + + assert_eq!( + UserBounds::new(Side::Continue, Side::Continue).unpack(2), + vec![ + UserBounds::new(Side::Some(1), Side::Some(1)), + UserBounds::new(Side::Some(2), Side::Some(2)) + ], + ); + + assert_eq!( + UserBounds::new(Side::Some(-1), Side::Continue).unpack(2), + vec![UserBounds::new(Side::Some(2), Side::Some(2)),], + ); + + assert_eq!( + UserBounds::new(Side::Continue, Side::Some(-1)).unpack(2), + vec![ + UserBounds::new(Side::Some(1), Side::Some(1)), + UserBounds::new(Side::Some(2), Side::Some(2)) + ], + ); + + assert_eq!( + UserBounds::new(Side::Some(-2), Side::Some(-1)).unpack(2), + vec![ + UserBounds::new(Side::Some(1), Side::Some(1)), + UserBounds::new(Side::Some(2), Side::Some(2)) + ], + ); + } + #[test] fn test_user_bounds_is_sortable() { assert!(UserBoundsList(Vec::new()).is_sortable()); @@ -615,4 +719,26 @@ mod tests { .unwrap() .is_forward_only()); } + + #[test] + fn test_vec_of_bounds_can_unpack() { + assert_eq!( + UserBoundsList::from_str("1,:1,2:3,4:").unwrap().unpack(4).0, + vec![ + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(3), Side::Some(3))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(4), Side::Some(4))), + ] + ); + + assert_eq!( + UserBoundsList::from_str("a{1}b{2}c").unwrap().unpack(4).0, + vec![ + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), + ] + ); + } } diff --git a/src/cut_str.rs b/src/cut_str.rs index 38d4f09..29fa4ae 100644 --- a/src/cut_str.rs +++ b/src/cut_str.rs @@ -2,7 +2,10 @@ use anyhow::{bail, Result}; use std::io::{BufRead, Write}; use std::ops::Range; -use crate::bounds::{bounds_to_std_range, BoundOrFiller, BoundsType}; +use crate::bounds::{ + bounds_to_std_range, BoundOrFiller, BoundsType, Side, UserBounds, UserBoundsList, +}; +use crate::json::escape_json; use crate::options::{Opt, Trim}; use crate::read_utils::read_line_with_eol; @@ -115,7 +118,9 @@ fn compress_delimiter_with_regex<'a>( #[cfg(feature = "regex")] fn maybe_replace_delimiter<'a>(text: &'a str, opt: &Opt) -> std::borrow::Cow<'a, str> { - if let Some(new_delimiter) = opt.replace_delimiter.as_ref() { + if opt.bounds_type == BoundsType::Characters { + std::borrow::Cow::Borrowed(text) + } else if let Some(new_delimiter) = opt.replace_delimiter.as_ref() { if let Some(re_bag) = &opt.regex_bag { re_bag.normal.replace_all(text, new_delimiter) } else { @@ -128,7 +133,9 @@ fn maybe_replace_delimiter<'a>(text: &'a str, opt: &Opt) -> std::borrow::Cow<'a, #[cfg(not(feature = "regex"))] fn maybe_replace_delimiter<'a>(text: &'a str, opt: &Opt) -> std::borrow::Cow<'a, str> { - if let Some(new_delimiter) = opt.replace_delimiter.as_ref() { + if opt.bounds_type == BoundsType::Characters { + std::borrow::Cow::Borrowed(text) + } else if let Some(new_delimiter) = opt.replace_delimiter.as_ref() { std::borrow::Cow::Owned(text.replace(&opt.delimiter, new_delimiter)) } else { std::borrow::Cow::Borrowed(text) @@ -170,6 +177,18 @@ fn trim_regex<'a>(line: &'a str, trim_kind: &Trim, re: &Regex) -> &'a str { &line[idx_start..idx_end] } +macro_rules! write_maybe_as_json { + ($writer:ident, $to_print:ident, $as_json:expr) => {{ + if $as_json { + $writer.write_all(b"\"")?; + $writer.write_all(&escape_json(&$to_print).as_bytes())?; + $writer.write_all(b"\"")?; + } else { + $writer.write_all($to_print.as_bytes())?; + } + }}; +} + pub fn cut_str( line: &str, opt: &Opt, @@ -261,14 +280,49 @@ pub fn cut_str( bounds_as_ranges.drain(..1); } + if opt.only_delimited && bounds_as_ranges.len() == 1 { + // If there's only 1 field it means that there were no delimiters + // and when used alogside `only_delimited` we must skip the line + return Ok(()); + } + + if opt.json { + stdout.write_all(b"[")?; + } + + let _bounds: UserBoundsList; + let mut bounds = &opt.bounds; + + if opt.bounds_type == BoundsType::Characters && opt.replace_delimiter.is_some() { + // Unpack bounds such as 1:3 or 2: into single character bounds + // such as 1:1,2:2,3:3 etc... + // We need it to be able to insert a replace character between every field. + // It can cost quite a bit and is risky because it may end up creating a + // char vector of the whole input (then again -c with -r is quite the + // rare usage). + + // Start by checking if we actually need to rewrite the bounds + if bounds.0.iter().any(|b| { + matches!( + b, + BoundOrFiller::Bound(UserBounds { + l: x, + r: y + }) if x != y || x == &Side::Continue + ) + }) { + // Yep, there at least a range bound. Let's do it + _bounds = bounds.unpack(bounds_as_ranges.len()); + bounds = &_bounds; + } + } + match bounds_as_ranges.len() { - 1 if opt.only_delimited => (), - 1 if opt.bounds.0.len() == 1 => { - stdout.write_all(line.as_bytes())?; - stdout.write_all(eol)?; + 1 if bounds.0.len() == 1 => { + write_maybe_as_json!(stdout, line, opt.json); } _ => { - opt.bounds + bounds .0 .iter() .enumerate() @@ -281,25 +335,35 @@ pub fn cut_str( BoundOrFiller::Bound(b) => b, }; - let r_array = [bounds_to_std_range(bounds_as_ranges.len(), b)?]; - let mut r_iter = r_array.iter(); - let _complements; - let mut n_ranges = 1; + let mut r_array = vec![bounds_to_std_range(bounds_as_ranges.len(), b)?]; if opt.complement { - _complements = complement_std_range(bounds_as_ranges.len(), &r_array[0]); - r_iter = _complements.iter(); - n_ranges = _complements.len(); + r_array = complement_std_range(bounds_as_ranges.len(), &r_array[0]); + } + + if opt.json { + r_array = r_array + .iter() + .flat_map(|r| r.start..r.end) + .map(|i| Range { + start: i, + end: i + 1, + }) + .collect(); } + let r_iter = r_array.iter(); + let n_ranges = r_array.len(); + for (idx_r, r) in r_iter.enumerate() { let idx_start = bounds_as_ranges[r.start].start; let idx_end = bounds_as_ranges[r.end - 1].end; let output = &line[idx_start..idx_end]; - stdout.write_all(maybe_replace_delimiter(output, opt).as_bytes())?; + let field_to_print = maybe_replace_delimiter(output, opt); + write_maybe_as_json!(stdout, field_to_print, opt.json); - if opt.join && !(i == opt.bounds.0.len() - 1 && idx_r == n_ranges - 1) { + if opt.join && !(i == bounds.0.len() - 1 && idx_r == n_ranges - 1) { stdout.write_all( opt.replace_delimiter .as_ref() @@ -311,11 +375,15 @@ pub fn cut_str( Ok(()) })?; - - stdout.write_all(eol)?; } } + if opt.json { + stdout.write_all(b"]")?; + } + + stdout.write_all(eol)?; + Ok(()) } @@ -680,6 +748,23 @@ mod tests { assert_eq!(output, "🤩\n".as_bytes()); } + #[test] + fn cut_str_it_cut_characters_and_replace_the_delimiter() { + let mut opt = make_fields_opt(); + let (mut output, mut buffer1, mut buffer2) = make_cut_str_buffers(); + let eol = &[EOL::Newline as u8]; + + let line = "😁🤩😝😎"; + opt.bounds = UserBoundsList::from_str("1,2,3:4").unwrap(); + opt.bounds_type = BoundsType::Characters; + opt.delimiter = String::new(); + opt.replace_delimiter = Some("-".to_owned()); + opt.join = true; // implied when using BoundsType::Characters + + cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap(); + assert_eq!(&String::from_utf8_lossy(&output), "😁-🤩-😝-😎\n"); + } + #[test] fn cut_str_it_supports_zero_terminated_lines() { let mut opt = make_fields_opt(); @@ -900,4 +985,90 @@ mod tests { cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap(); assert_eq!(output, b"abc\n".as_slice()); } + + #[test] + fn cut_str_it_produce_json_output() { + let mut opt = make_fields_opt(); + opt.json = true; + opt.replace_delimiter = Some(",".to_owned()); + let (mut output, mut buffer1, mut buffer2) = make_cut_str_buffers(); + let eol = &[EOL::Newline as u8]; + + let line = "a-b-c"; + opt.bounds = UserBoundsList::from_str("1,3").unwrap(); + opt.join = true; + + cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap(); + assert_eq!( + output, + br#"["a","c"] +"# + .as_slice() + ); + } + + #[test] + fn cut_str_json_with_single_field_is_still_an_array() { + let mut opt = make_fields_opt(); + opt.json = true; + opt.replace_delimiter = Some(",".to_owned()); + let (mut output, mut buffer1, mut buffer2) = make_cut_str_buffers(); + let eol = &[EOL::Newline as u8]; + + let line = "a-b-c"; + opt.bounds = UserBoundsList::from_str("1").unwrap(); + opt.join = true; + + cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap(); + assert_eq!( + output, + br#"["a"] +"# + .as_slice() + ); + } + + #[test] + fn cut_str_complement_works_with_json() { + let mut opt = make_fields_opt(); + opt.json = true; + opt.replace_delimiter = Some(",".to_owned()); + opt.complement = true; + let (mut output, mut buffer1, mut buffer2) = make_cut_str_buffers(); + let eol = &[EOL::Newline as u8]; + + let line = "a-b-c"; + opt.bounds = UserBoundsList::from_str("2,2:3,-1").unwrap(); + opt.join = true; + + cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap(); + assert_eq!( + output, + br#"["a","c","a","a","b"] +"# + .as_slice() + ); + } + + #[test] + fn cut_str_json_on_characters_works() { + let mut opt = make_fields_opt(); + let (mut output, mut buffer1, mut buffer2) = make_cut_str_buffers(); + let eol = &[EOL::Newline as u8]; + + let line = "😁🤩😝😎"; + opt.bounds = UserBoundsList::from_str("1,2,3:4").unwrap(); + opt.bounds_type = BoundsType::Characters; + opt.delimiter = String::new(); + opt.join = true; + opt.json = true; + opt.replace_delimiter = Some(",".to_owned()); + + cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap(); + assert_eq!( + &String::from_utf8_lossy(&output), + r#"["\uD83D\uDE01","\uD83E\uDD29","\uD83D\uDE1D","\uD83D\uDE0E"] +"# + ); + } } diff --git a/src/json.rs b/src/json.rs new file mode 100644 index 0000000..019060b --- /dev/null +++ b/src/json.rs @@ -0,0 +1,42 @@ +use std::borrow::Cow; + +pub fn escape_json(input: &str) -> Cow<'_, str> { + let needs_escape = input.chars().any(|ch| { + matches!( + ch, + '"' | '\\' | '\x08' | '\x09' | '\x0A' | '\x0C' | '\x0D' + | '\u{0000}'..='\u{001F}' | '\u{007F}' | '\u{2028}' | '\u{2029}' + | '\u{10000}'.. + + ) + }); + + if !needs_escape { + return Cow::Borrowed(input); + } + + let mut output = String::with_capacity(input.len() + input.len().div_ceil(5)); + + for ch in input.chars() { + match ch { + '"' => output.push_str("\\\""), + '\\' => output.push_str("\\\\"), + '\x08' => output.push_str("\\b"), + '\x09' => output.push_str("\\t"), + '\x0A' => output.push_str("\\n"), + '\x0C' => output.push_str("\\f"), + '\x0D' => output.push_str("\\r"), + '\u{0000}'..='\u{001F}' | '\u{007F}' | '\u{2028}' | '\u{2029}' => { + output.push_str(&format!("\\u{:04X}", ch as u32)) + } + ch if ch > '\u{7F}' => { + for c in ch.encode_utf16(&mut [0; 2]) { + output.push_str(&format!("\\u{:04X}", c)); + } + } + _ => output.push(ch), + } + } + + Cow::Owned(output) +} diff --git a/src/lib.rs b/src/lib.rs index f546e92..15edbd3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,5 +2,6 @@ pub mod bounds; pub mod cut_bytes; pub mod cut_lines; pub mod cut_str; +mod json; pub mod options; mod read_utils; diff --git a/src/options.rs b/src/options.rs index ebceca5..0f630e9 100644 --- a/src/options.rs +++ b/src/options.rs @@ -33,6 +33,7 @@ pub struct Opt { pub version: bool, pub complement: bool, pub join: bool, + pub json: bool, #[cfg(feature = "regex")] pub regex_bag: Option, #[cfg(not(feature = "regex"))] @@ -54,6 +55,7 @@ impl Default for Opt { version: false, complement: false, join: false, + json: false, regex_bag: None, } } diff --git a/tests/cli.rs b/tests/cli.rs index 6c42516..e77b672 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -328,9 +328,9 @@ fn it_fails_if_both_join_and_nojoin_are_used_at_once() { let assert = cmd.args(["-j", "--no-join"]).write_stdin("foobar").assert(); - assert - .failure() - .stderr("tuc: runtime error. You can't pass both --join and --no-join\n"); + assert.failure().stderr( + "tuc: runtime error. It's not possible to use --join and --no-join simultaneously\n", + ); } #[test] @@ -342,9 +342,9 @@ fn it_fails_if_both_replace_and_nojoin_are_used_at_once() { .write_stdin("foobar") .assert(); - assert - .failure() - .stderr("tuc: runtime error. Since --replace implies --join, you can't pass --no-join\n"); + assert.failure().stderr( + "tuc: runtime error. You can't pass --no-join when using --replace, which implies --join\n", + ); } #[cfg(feature = "regex")] @@ -376,3 +376,88 @@ fn does_not_panic_if_attemtping_to_use_regex_arg_with_noregex_build() { "tuc: unexpected arguments [\"-e\", \".\"]\nTry 'tuc --help' for more information.\n", ); } + +#[test] +fn it_emit_output_as_json() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + + let assert = cmd + .args(["--json", "-d", "/", "-f", "1,2,1:3"]) + .write_stdin("a/b/c/d") + .assert(); + + assert.success().stdout( + r#"["a","b","a","b","c"] +"#, + ); +} + +#[test] +fn it_emit_output_as_json_even_when_cutting_on_chars() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + + let assert = cmd + .args(["--json", "-c", "1,2,1:3"]) + .write_stdin("abcd") + .assert(); + + assert.success().stdout( + r#"["a","b","a","b","c"] +"#, + ); +} + +#[test] +fn it_does_not_allow_to_replace_delimiter_with_json() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + + let assert = cmd.args(["--json", "-r", "x"]).assert(); + + assert + .failure() + .stderr("tuc: runtime error. The use of --replace with --json is not supported\n"); +} + +#[test] +fn it_is_not_allowed_to_use_character_with_nojoin() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + + let assert = cmd.args(["-c", "1", "--no-join"]).assert(); + + assert.failure().stderr( + "tuc: runtime error. Since --characters implies --join, you can\'t pass --no-join\n", + ); +} + +#[test] +fn it_does_not_support_json_on_lines() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + + let assert = cmd.args(["-l", "1", "--json"]).assert(); + + assert.failure().stderr( + "tuc: runtime error. --json support is available only for --fields and --characters\n", + ); +} + +#[test] +fn it_does_not_support_json_on_bytes() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + + let assert = cmd.args(["-b", "1", "--json"]).assert(); + + assert.failure().stderr( + "tuc: runtime error. --json support is available only for --fields and --characters\n", + ); +} + +#[test] +fn it_cannot_format_fields_alongside_json() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + + let assert = cmd.args(["-f", "a{1}b", "--json"]).assert(); + + assert + .failure() + .stderr("tuc: runtime error. Cannot format fields when using --json\n"); +}