Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

slice: add --invert option #2298

Merged
merged 2 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 71 additions & 16 deletions src/cmd/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ slice options:
The value is the field value. The output is a
JSON array. If --no-headers is set, then
the keys are the column indices (zero-based).
--invert slice all records EXCEPT those in the specified range.

Examples:
# Slice from the 3rd record to the end
Expand All @@ -46,6 +47,9 @@ Examples:
# Slice the last 10 records
qsv slice -s -10 data.csv

# Get everything except the last 10 records
qsv slice -s -10 --invert data.csv

# Slice the first three records of the last 10 records
qsv slice -s -10 -l 3 data.csv

Expand All @@ -60,6 +64,9 @@ Examples:
qsv slice -s 9 -e 19 --json data.csv
qsv slice -s 9 -l 10 --json data.csv

# Slice records 1 to 9 and 21 to the end as JSON
qsv slice -s 9 -l 10 --invert --json data.csv

Common options:
-h, --help Display this message
-o, --output <file> Write output to <file> instead of stdout.
Expand Down Expand Up @@ -92,6 +99,7 @@ struct Args {
flag_output: Option<String>,
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
flag_invert: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
Expand All @@ -111,11 +119,18 @@ impl Args {
let (start, end) = self.range()?;
if self.flag_json {
let headers = rdr.byte_headers()?.clone();
let records = rdr
.byte_records()
.skip(start)
.take(end - start)
.map(|r| r.unwrap());
let records = rdr.byte_records().enumerate().filter_map(move |(i, r)| {
let should_include = if self.flag_invert {
i < start || i >= end
} else {
i >= start && i < end
};
if should_include {
Some(r.unwrap())
} else {
None
}
});
util::write_json(
self.flag_output.as_ref(),
self.flag_no_headers,
Expand All @@ -125,36 +140,76 @@ impl Args {
} else {
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut rdr, &mut wtr)?;
for r in rdr.byte_records().skip(start).take(end - start) {
wtr.write_byte_record(&r?)?;

for (i, r) in rdr.byte_records().enumerate() {
if self.flag_invert == (i < start || i >= end) {
wtr.write_byte_record(&r?)?;
}
}
Ok(wtr.flush()?)
}
}

fn with_index(&self, mut indexed_file: Indexed<fs::File, fs::File>) -> CliResult<()> {
let (start, end) = self.range()?;
if end - start == 0 {
if end - start == 0 && !self.flag_invert {
return Ok(());
}
indexed_file.seek(start as u64)?;

if self.flag_json {
let headers = indexed_file.byte_headers()?.clone();
let records = indexed_file
.byte_records()
.take(end - start)
.map(|r| r.unwrap());
let total_rows = util::count_rows(&self.rconfig())?;
let records = if self.flag_invert {
let mut records: Vec<csv::ByteRecord> =
Vec::with_capacity(start + (total_rows as usize - end));
// Get records before start
indexed_file.seek(0)?;
for r in indexed_file.byte_records().take(start) {
records.push(r.unwrap());
}

// Get records after end
indexed_file.seek(end as u64)?;
for r in indexed_file.byte_records().take(total_rows as usize - end) {
records.push(r.unwrap());
}
records
} else {
indexed_file.seek(start as u64)?;
indexed_file
.byte_records()
.take(end - start)
.map(|r| r.unwrap())
.collect::<Vec<_>>()
};
util::write_json(
self.flag_output.as_ref(),
self.flag_no_headers,
&headers,
records,
records.into_iter(),
)
} else {
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut *indexed_file, &mut wtr)?;
for r in indexed_file.byte_records().take(end - start) {
wtr.write_byte_record(&r?)?;

let total_rows = util::count_rows(&self.rconfig())? as usize;
if self.flag_invert {
// Get records before start
indexed_file.seek(0)?;
for r in indexed_file.byte_records().take(start) {
wtr.write_byte_record(&r?)?;
}

// Get records after end
indexed_file.seek(end as u64)?;
for r in indexed_file.byte_records().take(total_rows - end) {
wtr.write_byte_record(&r?)?;
}
} else {
indexed_file.seek(start as u64)?;
for r in indexed_file.byte_records().take(end - start) {
wtr.write_byte_record(&r?)?;
}
}
Ok(wtr.flush()?)
}
Expand Down
153 changes: 153 additions & 0 deletions tests/test_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,156 @@ fn slice_neg_index_withindex() {
fn slice_neg_index_no_headers_withindex() {
test_index("slice_neg_index_no_headers_withindex", -2, "d", false, true);
}

fn test_slice_invert(
name: &str,
start: Option<isize>,
end: Option<usize>,
expected: &[&str],
headers: bool,
use_index: bool,
as_len: bool,
json_output: bool,
) {
let (wrk, mut cmd) = setup(name, headers, use_index);
if let Some(start) = start {
cmd.arg("--start").arg(&start.to_string());
}
if let Some(end) = end {
if as_len {
let start = start.unwrap_or(0);
if start < 0 {
cmd.arg("--len").arg(&end.to_string());
} else {
cmd.arg("--len")
.arg(&(end - start.unsigned_abs()).to_string());
}
} else {
cmd.arg("--end").arg(&end.to_string());
}
}
if !headers {
cmd.arg("--no-headers");
}
cmd.arg("--invert");

if json_output {
let output_file = wrk.path("output.json").to_string_lossy().to_string();

cmd.arg("--json").args(&["--output", &output_file]);

wrk.assert_success(&mut cmd);

let gots = wrk.read_to_string(&output_file);
let gotj: serde_json::Value = serde_json::from_str(&gots).unwrap();
let got = gotj.to_string();

let expected_vec = expected
.iter()
.map(|&s| {
if headers {
format!("{{\"header\":\"{}\"}}", s)
} else {
format!("{{\"0\":\"{}\"}}", s)
}
})
.collect::<Vec<String>>();
let expected = format!("[{}]", expected_vec.join(","));

assert_eq!(got, expected);
} else {
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let mut expected = expected
.iter()
.map(|&s| vec![s.to_owned()])
.collect::<Vec<Vec<String>>>();
if headers {
expected.insert(0, svec!["header"]);
}
assert_eq!(got, expected);
}
}

#[test]
fn slice_invert_simple() {
test_slice_invert(
"slice_invert_simple",
Some(0),
Some(1),
&["b", "c", "d", "e"],
true,
false,
false,
false,
);
}

#[test]
fn slice_invert_middle() {
test_slice_invert(
"slice_invert_middle",
Some(1),
Some(3),
&["a", "d", "e"],
true,
false,
false,
false,
);
}

#[test]
fn slice_invert_with_index() {
test_slice_invert(
"slice_invert_with_index",
Some(1),
Some(3),
&["a", "d", "e"],
true,
true,
false,
false,
);
}

#[test]
fn slice_invert_json() {
test_slice_invert(
"slice_invert_json",
Some(1),
Some(3),
&["a", "d", "e"],
true,
false,
false,
true,
);
}

#[test]
fn slice_invert_negative() {
test_slice_invert(
"slice_invert_negative",
Some(-2),
None,
&["a", "b", "c"],
true,
false,
false,
false,
);
}

#[test]
fn slice_invert_with_len() {
test_slice_invert(
"slice_invert_with_len",
Some(1),
Some(2),
&["a", "c", "d", "e"],
true,
false,
true,
false,
);
}
Loading