From 1cade5569f9e26fa43e3f7fc6c13427c577b214d Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Sun, 10 Dec 2023 09:32:16 -0500 Subject: [PATCH 1/2] `deps`: use our patched version of dynfmt crate --- Cargo.lock | 16 ++++++++++++---- Cargo.toml | 3 +++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b5a4f9ef..1e9345d95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1528,10 +1528,9 @@ checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d" [[package]] name = "dynfmt" version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c298552016db86f0d49e5de09818dd86c536f66095013cc415f4f85744033f" +source = "git+https://github.com/jqnatividad/dynfmt?branch=2021-clippy_ptr_as_ptr-bumpdeps#0dec5eaceba58e294283d8ebc6333b421220c186" dependencies = [ - "erased-serde", + "erased-serde 0.4.0", "lazy_static", "regex", "serde", @@ -1617,6 +1616,15 @@ dependencies = [ "serde", ] +[[package]] +name = "erased-serde" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3286168faae03a0e583f6fde17c02c8b8bba2dcc2061d0f7817066e5b0af706" +dependencies = [ + "serde", +] + [[package]] name = "errno" version = "0.3.8" @@ -2899,7 +2907,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c81f8ac20188feb5461a73eabb22a34dd09d6d58513535eb587e46bff6ba250" dependencies = [ "bstr", - "erased-serde", + "erased-serde 0.3.31", "libloading 0.8.1", "mlua-sys", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index dffa49114..f6735cdc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -230,6 +230,9 @@ quickcheck = { version = "1", default-features = false } rusqlite = { version = "0.29", features = ["bundled"] } serial_test = { version = "2.0", features = ["file_locks"] } +[patch.crates-io] +dynfmt = { git = "https://github.com/jqnatividad/dynfmt", branch = "2021-clippy_ptr_as_ptr-bumpdeps"} + [features] default = ["mimalloc"] all_features = [ From 536e2f74acbd632e129b23553ff1b3266ac843c1 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Sun, 10 Dec 2023 09:34:40 -0500 Subject: [PATCH 2/2] `apply` & `applydp`: refactor dynfmt & calconv prep - do not write new_column until after validation - more efficient initialization of dynfmt_template, moving it behind if guard --- src/cmd/apply.rs | 34 ++++++++++++++++++++-------------- src/cmd/applydp.rs | 34 ++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/cmd/apply.rs b/src/cmd/apply.rs index 9c9031071..19fd6ba8d 100644 --- a/src/cmd/apply.rs +++ b/src/cmd/apply.rs @@ -513,42 +513,41 @@ pub fn run(argv: &[&str]) -> CliResult<()> { } } - if !rconfig.no_headers { - if let Some(new_column) = &args.flag_new_column { - headers.push_field(new_column); - } - wtr.write_record(&headers)?; - } - // for dynfmt, safe_headers are the "safe" version of colnames - alphanumeric only, // all other chars replaced with underscore // dynfmt_fields are the columns used in the dynfmt --formatstr option // we prep it so we only populate the lookup vec with the index of these columns // so SimpleCurlyFormat is performant - let mut dynfmt_fields = Vec::with_capacity(10); // 10 is a reasonable default to save allocs - let mut dynfmt_template = args.flag_formatstr.clone(); - if args.cmd_dynfmt || args.cmd_calcconv { + let dynfmt_template = if args.cmd_dynfmt || args.cmd_calcconv { if args.flag_no_headers { return fail_incorrectusage_clierror!("dynfmt/calcconv subcommand requires headers."); } + + let mut dynfmt_template_wrk = args.flag_formatstr.clone(); + let mut dynfmt_fields = Vec::new(); + // first, get the fields used in the dynfmt template - let (safe_headers, _) = util::safe_header_names(&headers, false, false, None, "", true); let formatstr_re: &'static Regex = crate::regex_oncelock!(r"\{(?P\w+)?\}"); for format_fields in formatstr_re.captures_iter(&args.flag_formatstr) { dynfmt_fields.push(format_fields.name("key").unwrap().as_str()); } // we sort the fields so we can do binary_search dynfmt_fields.sort_unstable(); + // now, get the indices of the columns for the lookup vec + let (safe_headers, _) = util::safe_header_names(&headers, false, false, None, "", true); for (i, field) in safe_headers.iter().enumerate() { if dynfmt_fields.binary_search(&field.as_str()).is_ok() { let field_with_curly = format!("{{{field}}}"); let field_index = format!("{{{i}}}"); - dynfmt_template = dynfmt_template.replace(&field_with_curly, &field_index); + dynfmt_template_wrk = dynfmt_template_wrk.replace(&field_with_curly, &field_index); } } - debug!("dynfmt_fields: {dynfmt_fields:?} dynfmt_template: {dynfmt_template}"); - } + debug!("dynfmt_fields: {dynfmt_fields:?} dynfmt_template: {dynfmt_template_wrk}"); + dynfmt_template_wrk + } else { + String::new() + }; let mut ops_vec: Vec = Vec::new(); @@ -576,6 +575,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> { return fail_incorrectusage_clierror!("Unknown apply subcommand."); }; + if !rconfig.no_headers { + if let Some(new_column) = &args.flag_new_column { + headers.push_field(new_column); + } + wtr.write_record(&headers)?; + } + // prep progress bar let show_progress = (args.flag_progressbar || util::get_envvar_flag("QSV_PROGRESSBAR")) && !rconfig.is_stdin(); diff --git a/src/cmd/applydp.rs b/src/cmd/applydp.rs index 90356b873..9580f9817 100644 --- a/src/cmd/applydp.rs +++ b/src/cmd/applydp.rs @@ -346,42 +346,41 @@ pub fn run(argv: &[&str]) -> CliResult<()> { } } - if !rconfig.no_headers { - if let Some(new_column) = &args.flag_new_column { - headers.push_field(new_column); - } - wtr.write_record(&headers)?; - } - // for dynfmt, safe_headers are the "safe" version of colnames - alphanumeric only, // all other chars replaced with underscore // dynfmt_fields are the columns used in the dynfmt --formatstr option // we prep it so we only populate the lookup vec with the index of these columns // so SimpleCurlyFormat is performant - let mut dynfmt_fields = Vec::with_capacity(10); // 10 is a reasonable default to save allocs - let mut dynfmt_template = args.flag_formatstr.clone(); - if args.cmd_dynfmt { + let dynfmt_template = if args.cmd_dynfmt || args.cmd_calcconv { if args.flag_no_headers { return fail_incorrectusage_clierror!("dynfmt/calcconv subcommand requires headers."); } + + let mut dynfmt_template_wrk = args.flag_formatstr.clone(); + let mut dynfmt_fields = Vec::new(); + // first, get the fields used in the dynfmt template - let (safe_headers, _) = util::safe_header_names(&headers, false, false, None, "", true); let formatstr_re: &'static Regex = crate::regex_oncelock!(r"\{(?P\w+)?\}"); for format_fields in formatstr_re.captures_iter(&args.flag_formatstr) { dynfmt_fields.push(format_fields.name("key").unwrap().as_str()); } // we sort the fields so we can do binary_search dynfmt_fields.sort_unstable(); + // now, get the indices of the columns for the lookup vec + let (safe_headers, _) = util::safe_header_names(&headers, false, false, None, "", true); for (i, field) in safe_headers.iter().enumerate() { if dynfmt_fields.binary_search(&field.as_str()).is_ok() { let field_with_curly = format!("{{{field}}}"); let field_index = format!("{{{i}}}"); - dynfmt_template = dynfmt_template.replace(&field_with_curly, &field_index); + dynfmt_template_wrk = dynfmt_template_wrk.replace(&field_with_curly, &field_index); } } - debug!("dynfmt_fields: {dynfmt_fields:?} dynfmt_template: {dynfmt_template}"); - } + debug!("dynfmt_fields: {dynfmt_fields:?} dynfmt_template: {dynfmt_template_wrk}"); + dynfmt_template_wrk + } else { + String::new() + }; enum ApplydpSubCmd { Operations, @@ -414,6 +413,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> { return fail!("Unknown applydp subcommand."); }; + if !rconfig.no_headers { + if let Some(new_column) = &args.flag_new_column { + headers.push_field(new_column); + } + wtr.write_record(&headers)?; + } + let prefer_dmy = args.flag_prefer_dmy || rconfig.get_dmy_preference(); // amortize memory allocation by reusing record