From 0650efb5633e0d951f771532726c5e7f0c4b5680 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:02:15 -0500 Subject: [PATCH 1/3] `deps`: add minijinja-contrib for additional filters/functions and enhanced Jinja2 python compatibility --- Cargo.lock | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 8 ++++++++ 2 files changed, 56 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 775d1e5c3..5d7d05346 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1903,6 +1903,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" dependencies = [ "powerfmt", + "serde", ] [[package]] @@ -3755,6 +3756,19 @@ dependencies = [ "v_htmlescape", ] +[[package]] +name = "minijinja-contrib" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ffd46ee854be23604a20efd6c9655374fefbe4d44b949dc0f907305d92873a" +dependencies = [ + "minijinja", + "rand", + "serde", + "time", + "time-tz", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -5297,6 +5311,7 @@ dependencies = [ "log", "mimalloc", "minijinja", + "minijinja-contrib", "mlua", "newline-converter", "num_cpus", @@ -6202,6 +6217,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde-xml-rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65162e9059be2f6a3421ebbb4fef3e74b7d9e7c60c50a0e292c6239f19f1edfa" +dependencies = [ + "log", + "serde", + "thiserror", + "xml-rs", +] + [[package]] name = "serde_derive" version = "1.0.214" @@ -6819,6 +6846,21 @@ dependencies = [ "time-core", ] +[[package]] +name = "time-tz" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a422f65dfdf08a81317d54fa00b45dc58cbccab69be78c1447391cc39ae8c9d4" +dependencies = [ + "cfg-if", + "parse-zoneinfo", + "phf", + "phf_codegen", + "serde", + "serde-xml-rs", + "time", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -7967,6 +8009,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "xml-rs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" + [[package]] name = "xxhash-rust" version = "0.8.12" diff --git a/Cargo.toml b/Cargo.toml index 538719a2b..7eb28713e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -146,6 +146,14 @@ minijinja = { version = "2", features = [ "speedups", "urlencode", ] } +minijinja-contrib = { version = "2", features = [ + "datetime", + "pycompat", + "rand", + "time", + "time-tz", + "timezone", +] } mlua = { version = "0.10", features = [ "luau", "luau-jit", From 584ed30ae5378618c7ceeec25fd22effadc4208c Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:02:58 -0500 Subject: [PATCH 2/3] `feat`: add additional template filters & functions with minijinja-contrib --- src/cmd/template.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/cmd/template.rs b/src/cmd/template.rs index 5148c8bf9..43847b767 100644 --- a/src/cmd/template.rs +++ b/src/cmd/template.rs @@ -4,8 +4,8 @@ https://docs.rs/minijinja/latest/minijinja/ Each CSV row is used to populate the template, with column headers used as variable names. Non-alphanumeric characters in column headers are replaced with an underscore ("_"). -The template syntax follows the Jinja2 template language with additional custom filters -(see bottom of file). +The template syntax follows the Jinja2 template language with additional custom functions/filters +from minijinja_contrib and custom filters defined in this command (see bottom of source). Example: data.csv: @@ -64,6 +64,7 @@ use std::{ }; use minijinja::Environment; +use minijinja_contrib::pycompat::unknown_method_callback; use rayon::{ iter::{IndexedParallelIterator, ParallelIterator}, prelude::IntoParallelRefIterator, @@ -129,7 +130,12 @@ pub fn run(argv: &[&str]) -> CliResult<()> { // Set up minijinja environment let mut env = Environment::new(); - // Add custom filters + // Add minijinja_contrib functions/filters + // see https://docs.rs/minijinja-contrib/latest/minijinja_contrib/ + minijinja_contrib::add_to_environment(&mut env); + env.set_unknown_method_callback(unknown_method_callback); + + // Add our own custom filters env.add_filter("substr", substr); env.add_filter("format_float", format_float); env.add_filter("human_count", human_count); @@ -175,6 +181,8 @@ pub fn run(argv: &[&str]) -> CliResult<()> { // Create filename environment once if needed let filename_env = if output_to_dir && args.flag_outfilename != QSV_ROWNO { let mut env = Environment::new(); + minijinja_contrib::add_to_environment(&mut env); + env.set_unknown_method_callback(unknown_method_callback); env.add_template("filename", &args.flag_outfilename)?; Some(env) } else { From 7a05c516667c0b1c7a168f4c1d24284f41622082 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:03:28 -0500 Subject: [PATCH 3/3] `tests`: add `template` tests exercising minijinja-contrib filters and functions --- tests/test_template.rs | 150 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/tests/test_template.rs b/tests/test_template.rs index 3fc1cbeee..d6bfeb03a 100644 --- a/tests/test_template.rs +++ b/tests/test_template.rs @@ -500,3 +500,153 @@ fn template_filter_error() { let expected = "Alice: \nBob: 123.45"; assert_eq!(got, expected); } + +#[test] +fn template_contrib_filters() { + let wrk = Workdir::new("template_contrib_filters"); + wrk.create( + "data.csv", + vec![ + svec!["text", "num", "datalist", "url"], + svec![ + "hello WORLD", + "12345.6789", + "a,b,c", + "https://example.com/path?q=test&lang=en" + ], + svec![ + "Testing 123", + "-98765.4321", + "1,2,3", + "http://localhost:8080/api" + ], + ], + ); + + // Test various minijinja_contrib filters + let mut cmd = wrk.command("template"); + cmd.arg("--template") + .arg(concat!( + // String filters + "capitalize: {{text|capitalize}}\n", + "title: {{text|title}}\n", + "upper: {{text|upper}}\n", + "lower: {{text|lower}}\n", + // URL encode + "urlencode: {{text|urlencode}}\n", + // List filters + "split: {{datalist|split(',')|join('|')}}\n", + "first: {{datalist|split(',')|first}}\n", + "last: {{datalist|split(',')|last}}\n", + // Add newline between records + "\n" + )) + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = concat!( + "capitalize: Hello world\n", + "title: Hello World\n", + "upper: HELLO WORLD\n", + "lower: hello world\n", + "urlencode: hello%20WORLD\n", + "split: a|b|c\n", + "first: a\n", + "last: c\n", + "capitalize: Testing 123\n", + "title: Testing 123\n", + "upper: TESTING 123\n", + "lower: testing 123\n", + "urlencode: Testing%20123\n", + "split: 1|2|3\n", + "first: 1\n", + "last: 3", + ); + assert_eq!(got, expected); +} + +#[test] +fn template_contrib_functions() { + let wrk = Workdir::new("template_contrib_functions"); + wrk.create( + "data.csv", + vec![ + svec!["num_messages", "date_col"], + svec!["1", "2023-06-24T16:37:22+00:00"], + svec!["2", "1999-12-24T16:37:22+12:00"], + ], + ); + + // Test various minijinja_contrib functions + let mut cmd = wrk.command("template"); + cmd.arg("--template") + .arg(concat!( + "pluralize: You have {{ num_messages }} message{{ num_messages|int|pluralize }}\n", + "now: {{now()|datetimeformat|length > 2}}\n", // Just verify we get a non-empty string + "dtformat: {{date_col|datetimeformat(format=\"long\", tz=\"EST\")}}\n", + "\n\n" + )) + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = concat!( + "pluralize: You have 1 message\n", + "now: true\n", + "dtformat: June 24 2023 11:37:22\n", + "\n", + "pluralize: You have 2 messages\n", + "now: true\n", + "dtformat: December 23 1999 23:37:22", + ); + assert_eq!(got, expected); +} + +#[test] +fn template_pycompat_filters() { + let wrk = Workdir::new("template_pycompat_filters"); + wrk.create( + "data.csv", + vec![ + svec!["text", "num", "mixed"], + svec!["Hello World!", "123", "ABC123xyz "], + svec!["TESTING", "abc", " Hello "], + ], + ); + + let mut cmd = wrk.command("template"); + cmd.arg("--template") + .arg(concat!( + // Test string methods from Python compatibility + "isascii: {{text.isascii()}}\n", + "isdigit: {{num.isdigit()}}\n", + "startswith: {{text.startswith('Hello')}}\n", + "isnumeric: {{num.isnumeric()}}\n", + "isupper: {{text.isupper()}}\n", + "replace: {{mixed.replace('ABC', 'XYZ')}}\n", + "rfind: {{mixed.rfind('xyz')}}\n", + "rstrip: {{mixed.rstrip()}}\n", + "\n" + )) + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = concat!( + "isascii: true\n", + "isdigit: true\n", + "startswith: true\n", + "isnumeric: true\n", + "isupper: false\n", + "replace: XYZ123xyz \n", + "rfind: 6\n", + "rstrip: ABC123xyz\n", + "isascii: true\n", + "isdigit: false\n", + "startswith: false\n", + "isnumeric: false\n", + "isupper: true\n", + "replace: Hello \n", + "rfind: -1\n", + "rstrip: Hello", + ); + assert_eq!(got, expected); +}