Skip to content

Commit

Permalink
Merge pull request #2295 from jqnatividad/python-loadexpression_fromfile
Browse files Browse the repository at this point in the history
`py`: add ability to load python expression from file
  • Loading branch information
jqnatividad authored Nov 17, 2024
2 parents 0cf645f + 8e2c92f commit d14bd04
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 24 deletions.
71 changes: 47 additions & 24 deletions src/cmd/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,22 @@ Some usage examples:
For more extensive examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_py.rs.
Usage:
qsv py map [options] -n <script> [<input>]
qsv py map [options] <new-column> <script> [<input>]
qsv py map --helper <file> [options] <new-column> <script> [<input>]
qsv py filter [options] <script> [<input>]
qsv py map [options] -n <expression> [<input>]
qsv py map [options] <new-column> <expression> [<input>]
qsv py map --helper <file> [options] <new-column> <expression> [<input>]
qsv py filter [options] <expression> [<input>]
qsv py map --help
qsv py filter --help
qsv py --help
py argument:
<expression> Can either be a python expression, or if it starts with
"file:" or ends with ".py" - the filepath from which to
load the python expression.
Note that argument expects a SINGLE expression, and not
a full-blown python script. Use the --helper option
to load helper code that you can call from the expression.
py options:
-f, --helper <file> File containing Python code that's loaded into the
qsv_uh Python module. Functions with a return statement
Expand All @@ -117,7 +125,6 @@ Common options:
use std::{ffi::CString, fs};

use indicatif::{ProgressBar, ProgressDrawTarget};
use log::{log_enabled, Level::Debug};
use pyo3::{
intern,
prelude::*,
Expand Down Expand Up @@ -164,7 +171,7 @@ struct Args {
cmd_map: bool,
cmd_filter: bool,
arg_new_column: Option<String>,
arg_script: String,
arg_expression: String,
flag_batch: usize,
flag_helper: Option<String>,
arg_input: Option<String>,
Expand All @@ -189,13 +196,32 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let mut rdr = rconfig.reader()?;
let mut wtr = Config::new(args.flag_output.as_ref()).writer()?;

if log_enabled!(Debug) {
let debug_flag = log::log_enabled!(log::Level::Debug);

if debug_flag {
Python::with_gil(|py| {
let msg = format!("Detected python={}", py.version());
winfo!("{msg}");
});
}

let expression = if let Some(expression_filepath) = args.arg_expression.strip_prefix("file:") {
match fs::read_to_string(expression_filepath) {
Ok(file_contents) => file_contents,
Err(e) => return fail_clierror!("Cannot load Python expression from file: {e}"),
}
} else if std::path::Path::new(&args.arg_expression)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
{
match fs::read_to_string(args.arg_expression.clone()) {
Ok(file_contents) => file_contents,
Err(e) => return fail_clierror!("Cannot load .py file: {e}"),
}
} else {
args.arg_expression.clone()
};

let mut helper_text = String::new();
if let Some(helper_file) = args.flag_helper {
helper_text = match fs::read_to_string(helper_file) {
Expand Down Expand Up @@ -264,11 +290,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let user_helpers_filename = CString::new("qsv_user_helpers.py").unwrap();
let user_helpers_module_name = CString::new("qsv_uh").unwrap();

let arg_script = CString::new(args.arg_script)
.map_err(|e| format!("Failed to create CString from script: {e}"))?;
let arg_expression = CString::new(expression)
.map_err(|e| format!("Failed to create CString from expression: {e}"))?;

let mut row_number = 0_u64;
let debug_flag = log::log_enabled!(Debug);

// main loop to read CSV and construct batches.
// we batch python operations so that the GILPool does not get very large
Expand Down Expand Up @@ -359,20 +384,18 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

py_row.call_method1(intern!(py, "_update_underlying_data"), (row_data,))?;

let result = py
.eval(&arg_script, Some(&batch_globals), Some(&batch_locals))
.map_err(|e| {
e.print_and_set_sys_last_vars(py);
error_count += 1;
if debug_flag {
log::error!("{e:?}");
}
format!(
"Evaluation of given expression in row {row_number} failed with the \
above error!"
)
})
.unwrap_or_else(|_| error_result.clone().into_any());
let result =
match py.eval(&arg_expression, Some(&batch_globals), Some(&batch_locals)) {
Ok(r) => r,
Err(e) => {
error_count += 1;
if debug_flag {
log::error!("Expression error:{row_number}-{e:?}");
}
e.print_and_set_sys_last_vars(py);
error_result.clone().into_any()
},
};

if args.cmd_map {
let result = helpers
Expand Down
91 changes: 91 additions & 0 deletions tests/test_py.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,97 @@ def celsius_to_fahrenheit(celsius):
assert_eq!(got, expected);
}

#[test]
fn py_map_userhelper_and_loadfile() {
let wrk = Workdir::new("py");
wrk.create(
"data.csv",
vec![
svec!["letter", "number"],
svec!["a", "1"],
svec!["b", "2"],
svec!["c", "6"],
svec!["d", "7"],
svec!["e", "fib of 8"],
],
);

wrk.create_from_string(
"user_helper.py",
r#"
def fibonacci(input):
try:
float(input)
except ValueError:
return "incorrect input - not a number"
sinput = str(input)
if not float(sinput).is_integer():
return "incorrect input - not a whole number"
n = int(sinput)
if n < 0:
return "incorrect input - negative number"
elif n == 0:
return 0
elif n == 1 or n == 2:
return 1
else:
return fibonacci(n-1) + fibonacci(n-2)
def celsius_to_fahrenheit(celsius):
try:
float(celsius)
except ValueError:
return "incorrect input - not a float"
fahrenheit = (float(celsius) * 9/5) + 32
return f'{fahrenheit:.1f}'
"#,
);

wrk.create_from_string("testfile.py", "qsv_uh.fibonacci(number)");

let mut cmd = wrk.command("py");
cmd.arg("map")
.arg("--helper")
.arg("user_helper.py")
.arg("fib")
.arg("testfile.py")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["letter", "number", "fib"],
svec!["a", "1", "1"],
svec!["b", "2", "1"],
svec!["c", "6", "8"],
svec!["d", "7", "13"],
svec!["e", "fib of 8", "incorrect input - not a number"],
];
assert_eq!(got, expected);

wrk.create_from_string("testfile2.code", "qsv_uh.celsius_to_fahrenheit(number)");

let mut cmd = wrk.command("py");
cmd.arg("map")
.arg("--helper")
.arg("user_helper.py")
.arg("fahrenheit")
.arg("file:testfile2.code")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["letter", "number", "fahrenheit"],
svec!["a", "1", "33.8"],
svec!["b", "2", "35.6"],
svec!["c", "6", "42.8"],
svec!["d", "7", "44.6"],
svec!["e", "fib of 8", "incorrect input - not a float"],
];
assert_eq!(got, expected);
}

#[test]
fn py_map_col_positional() {
let wrk = Workdir::new("py");
Expand Down

0 comments on commit d14bd04

Please sign in to comment.