From 536e2f74acbd632e129b23553ff1b3266ac843c1 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Sun, 10 Dec 2023 09:34:40 -0500 Subject: [PATCH] `apply` & `applydp`: refactor dynfmt & calconv prep - do not write new_column until after validation - more efficient initialization of dynfmt_template, moving it behind if guard --- src/cmd/apply.rs | 34 ++++++++++++++++++++-------------- src/cmd/applydp.rs | 34 ++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/cmd/apply.rs b/src/cmd/apply.rs index 9c9031071..19fd6ba8d 100644 --- a/src/cmd/apply.rs +++ b/src/cmd/apply.rs @@ -513,42 +513,41 @@ pub fn run(argv: &[&str]) -> CliResult<()> { } } - if !rconfig.no_headers { - if let Some(new_column) = &args.flag_new_column { - headers.push_field(new_column); - } - wtr.write_record(&headers)?; - } - // for dynfmt, safe_headers are the "safe" version of colnames - alphanumeric only, // all other chars replaced with underscore // dynfmt_fields are the columns used in the dynfmt --formatstr option // we prep it so we only populate the lookup vec with the index of these columns // so SimpleCurlyFormat is performant - let mut dynfmt_fields = Vec::with_capacity(10); // 10 is a reasonable default to save allocs - let mut dynfmt_template = args.flag_formatstr.clone(); - if args.cmd_dynfmt || args.cmd_calcconv { + let dynfmt_template = if args.cmd_dynfmt || args.cmd_calcconv { if args.flag_no_headers { return fail_incorrectusage_clierror!("dynfmt/calcconv subcommand requires headers."); } + + let mut dynfmt_template_wrk = args.flag_formatstr.clone(); + let mut dynfmt_fields = Vec::new(); + // first, get the fields used in the dynfmt template - let (safe_headers, _) = util::safe_header_names(&headers, false, false, None, "", true); let formatstr_re: &'static Regex = crate::regex_oncelock!(r"\{(?P\w+)?\}"); for format_fields in formatstr_re.captures_iter(&args.flag_formatstr) { dynfmt_fields.push(format_fields.name("key").unwrap().as_str()); } // we sort the fields so we can do binary_search dynfmt_fields.sort_unstable(); + // now, get the indices of the columns for the lookup vec + let (safe_headers, _) = util::safe_header_names(&headers, false, false, None, "", true); for (i, field) in safe_headers.iter().enumerate() { if dynfmt_fields.binary_search(&field.as_str()).is_ok() { let field_with_curly = format!("{{{field}}}"); let field_index = format!("{{{i}}}"); - dynfmt_template = dynfmt_template.replace(&field_with_curly, &field_index); + dynfmt_template_wrk = dynfmt_template_wrk.replace(&field_with_curly, &field_index); } } - debug!("dynfmt_fields: {dynfmt_fields:?} dynfmt_template: {dynfmt_template}"); - } + debug!("dynfmt_fields: {dynfmt_fields:?} dynfmt_template: {dynfmt_template_wrk}"); + dynfmt_template_wrk + } else { + String::new() + }; let mut ops_vec: Vec = Vec::new(); @@ -576,6 +575,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> { return fail_incorrectusage_clierror!("Unknown apply subcommand."); }; + if !rconfig.no_headers { + if let Some(new_column) = &args.flag_new_column { + headers.push_field(new_column); + } + wtr.write_record(&headers)?; + } + // prep progress bar let show_progress = (args.flag_progressbar || util::get_envvar_flag("QSV_PROGRESSBAR")) && !rconfig.is_stdin(); diff --git a/src/cmd/applydp.rs b/src/cmd/applydp.rs index 90356b873..9580f9817 100644 --- a/src/cmd/applydp.rs +++ b/src/cmd/applydp.rs @@ -346,42 +346,41 @@ pub fn run(argv: &[&str]) -> CliResult<()> { } } - if !rconfig.no_headers { - if let Some(new_column) = &args.flag_new_column { - headers.push_field(new_column); - } - wtr.write_record(&headers)?; - } - // for dynfmt, safe_headers are the "safe" version of colnames - alphanumeric only, // all other chars replaced with underscore // dynfmt_fields are the columns used in the dynfmt --formatstr option // we prep it so we only populate the lookup vec with the index of these columns // so SimpleCurlyFormat is performant - let mut dynfmt_fields = Vec::with_capacity(10); // 10 is a reasonable default to save allocs - let mut dynfmt_template = args.flag_formatstr.clone(); - if args.cmd_dynfmt { + let dynfmt_template = if args.cmd_dynfmt || args.cmd_calcconv { if args.flag_no_headers { return fail_incorrectusage_clierror!("dynfmt/calcconv subcommand requires headers."); } + + let mut dynfmt_template_wrk = args.flag_formatstr.clone(); + let mut dynfmt_fields = Vec::new(); + // first, get the fields used in the dynfmt template - let (safe_headers, _) = util::safe_header_names(&headers, false, false, None, "", true); let formatstr_re: &'static Regex = crate::regex_oncelock!(r"\{(?P\w+)?\}"); for format_fields in formatstr_re.captures_iter(&args.flag_formatstr) { dynfmt_fields.push(format_fields.name("key").unwrap().as_str()); } // we sort the fields so we can do binary_search dynfmt_fields.sort_unstable(); + // now, get the indices of the columns for the lookup vec + let (safe_headers, _) = util::safe_header_names(&headers, false, false, None, "", true); for (i, field) in safe_headers.iter().enumerate() { if dynfmt_fields.binary_search(&field.as_str()).is_ok() { let field_with_curly = format!("{{{field}}}"); let field_index = format!("{{{i}}}"); - dynfmt_template = dynfmt_template.replace(&field_with_curly, &field_index); + dynfmt_template_wrk = dynfmt_template_wrk.replace(&field_with_curly, &field_index); } } - debug!("dynfmt_fields: {dynfmt_fields:?} dynfmt_template: {dynfmt_template}"); - } + debug!("dynfmt_fields: {dynfmt_fields:?} dynfmt_template: {dynfmt_template_wrk}"); + dynfmt_template_wrk + } else { + String::new() + }; enum ApplydpSubCmd { Operations, @@ -414,6 +413,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> { return fail!("Unknown applydp subcommand."); }; + if !rconfig.no_headers { + if let Some(new_column) = &args.flag_new_column { + headers.push_field(new_column); + } + wtr.write_record(&headers)?; + } + let prefer_dmy = args.flag_prefer_dmy || rconfig.get_dmy_preference(); // amortize memory allocation by reusing record