diff --git a/src/cmd/slice.rs b/src/cmd/slice.rs index 1d8c16845..166ba93be 100644 --- a/src/cmd/slice.rs +++ b/src/cmd/slice.rs @@ -30,6 +30,7 @@ slice options: The value is the field value. The output is a JSON array. If --no-headers is set, then the keys are the column indices (zero-based). + --invert slice all records EXCEPT those in the specified range. Examples: # Slice from the 3rd record to the end @@ -46,6 +47,9 @@ Examples: # Slice the last 10 records qsv slice -s -10 data.csv + # Get everything except the last 10 records + qsv slice -s -10 --invert data.csv + # Slice the first three records of the last 10 records qsv slice -s -10 -l 3 data.csv @@ -60,6 +64,9 @@ Examples: qsv slice -s 9 -e 19 --json data.csv qsv slice -s 9 -l 10 --json data.csv + # Slice records 1 to 9 and 21 to the end as JSON + qsv slice -s 9 -l 10 --invert --json data.csv + Common options: -h, --help Display this message -o, --output Write output to instead of stdout. @@ -92,6 +99,7 @@ struct Args { flag_output: Option, flag_no_headers: bool, flag_delimiter: Option, + flag_invert: bool, } pub fn run(argv: &[&str]) -> CliResult<()> { @@ -111,11 +119,18 @@ impl Args { let (start, end) = self.range()?; if self.flag_json { let headers = rdr.byte_headers()?.clone(); - let records = rdr - .byte_records() - .skip(start) - .take(end - start) - .map(|r| r.unwrap()); + let records = rdr.byte_records().enumerate().filter_map(move |(i, r)| { + let should_include = if self.flag_invert { + i < start || i >= end + } else { + i >= start && i < end + }; + if should_include { + Some(r.unwrap()) + } else { + None + } + }); util::write_json( self.flag_output.as_ref(), self.flag_no_headers, @@ -125,8 +140,11 @@ impl Args { } else { let mut wtr = self.wconfig().writer()?; self.rconfig().write_headers(&mut rdr, &mut wtr)?; - for r in rdr.byte_records().skip(start).take(end - start) { - wtr.write_byte_record(&r?)?; + + for (i, r) in rdr.byte_records().enumerate() { + if self.flag_invert == (i < start || i >= end) { + wtr.write_byte_record(&r?)?; + } } Ok(wtr.flush()?) } @@ -134,27 +152,64 @@ impl Args { fn with_index(&self, mut indexed_file: Indexed) -> CliResult<()> { let (start, end) = self.range()?; - if end - start == 0 { + if end - start == 0 && !self.flag_invert { return Ok(()); } - indexed_file.seek(start as u64)?; + if self.flag_json { let headers = indexed_file.byte_headers()?.clone(); - let records = indexed_file - .byte_records() - .take(end - start) - .map(|r| r.unwrap()); + let total_rows = util::count_rows(&self.rconfig())?; + let records = if self.flag_invert { + let mut records: Vec = + Vec::with_capacity(start + (total_rows as usize - end)); + // Get records before start + indexed_file.seek(0)?; + for r in indexed_file.byte_records().take(start) { + records.push(r.unwrap()); + } + + // Get records after end + indexed_file.seek(end as u64)?; + for r in indexed_file.byte_records().take(total_rows as usize - end) { + records.push(r.unwrap()); + } + records + } else { + indexed_file.seek(start as u64)?; + indexed_file + .byte_records() + .take(end - start) + .map(|r| r.unwrap()) + .collect::>() + }; util::write_json( self.flag_output.as_ref(), self.flag_no_headers, &headers, - records, + records.into_iter(), ) } else { let mut wtr = self.wconfig().writer()?; self.rconfig().write_headers(&mut *indexed_file, &mut wtr)?; - for r in indexed_file.byte_records().take(end - start) { - wtr.write_byte_record(&r?)?; + + let total_rows = util::count_rows(&self.rconfig())? as usize; + if self.flag_invert { + // Get records before start + indexed_file.seek(0)?; + for r in indexed_file.byte_records().take(start) { + wtr.write_byte_record(&r?)?; + } + + // Get records after end + indexed_file.seek(end as u64)?; + for r in indexed_file.byte_records().take(total_rows - end) { + wtr.write_byte_record(&r?)?; + } + } else { + indexed_file.seek(start as u64)?; + for r in indexed_file.byte_records().take(end - start) { + wtr.write_byte_record(&r?)?; + } } Ok(wtr.flush()?) } diff --git a/tests/test_slice.rs b/tests/test_slice.rs index b8cbbf27e..ea334dc02 100644 --- a/tests/test_slice.rs +++ b/tests/test_slice.rs @@ -308,3 +308,156 @@ fn slice_neg_index_withindex() { fn slice_neg_index_no_headers_withindex() { test_index("slice_neg_index_no_headers_withindex", -2, "d", false, true); } + +fn test_slice_invert( + name: &str, + start: Option, + end: Option, + expected: &[&str], + headers: bool, + use_index: bool, + as_len: bool, + json_output: bool, +) { + let (wrk, mut cmd) = setup(name, headers, use_index); + if let Some(start) = start { + cmd.arg("--start").arg(&start.to_string()); + } + if let Some(end) = end { + if as_len { + let start = start.unwrap_or(0); + if start < 0 { + cmd.arg("--len").arg(&end.to_string()); + } else { + cmd.arg("--len") + .arg(&(end - start.unsigned_abs()).to_string()); + } + } else { + cmd.arg("--end").arg(&end.to_string()); + } + } + if !headers { + cmd.arg("--no-headers"); + } + cmd.arg("--invert"); + + if json_output { + let output_file = wrk.path("output.json").to_string_lossy().to_string(); + + cmd.arg("--json").args(&["--output", &output_file]); + + wrk.assert_success(&mut cmd); + + let gots = wrk.read_to_string(&output_file); + let gotj: serde_json::Value = serde_json::from_str(&gots).unwrap(); + let got = gotj.to_string(); + + let expected_vec = expected + .iter() + .map(|&s| { + if headers { + format!("{{\"header\":\"{}\"}}", s) + } else { + format!("{{\"0\":\"{}\"}}", s) + } + }) + .collect::>(); + let expected = format!("[{}]", expected_vec.join(",")); + + assert_eq!(got, expected); + } else { + let got: Vec> = wrk.read_stdout(&mut cmd); + let mut expected = expected + .iter() + .map(|&s| vec![s.to_owned()]) + .collect::>>(); + if headers { + expected.insert(0, svec!["header"]); + } + assert_eq!(got, expected); + } +} + +#[test] +fn slice_invert_simple() { + test_slice_invert( + "slice_invert_simple", + Some(0), + Some(1), + &["b", "c", "d", "e"], + true, + false, + false, + false, + ); +} + +#[test] +fn slice_invert_middle() { + test_slice_invert( + "slice_invert_middle", + Some(1), + Some(3), + &["a", "d", "e"], + true, + false, + false, + false, + ); +} + +#[test] +fn slice_invert_with_index() { + test_slice_invert( + "slice_invert_with_index", + Some(1), + Some(3), + &["a", "d", "e"], + true, + true, + false, + false, + ); +} + +#[test] +fn slice_invert_json() { + test_slice_invert( + "slice_invert_json", + Some(1), + Some(3), + &["a", "d", "e"], + true, + false, + false, + true, + ); +} + +#[test] +fn slice_invert_negative() { + test_slice_invert( + "slice_invert_negative", + Some(-2), + None, + &["a", "b", "c"], + true, + false, + false, + false, + ); +} + +#[test] +fn slice_invert_with_len() { + test_slice_invert( + "slice_invert_with_len", + Some(1), + Some(2), + &["a", "c", "d", "e"], + true, + false, + true, + false, + ); +}