From bb49f9e129b9ccd513087e964c056aa53f186356 Mon Sep 17 00:00:00 2001 From: Ben Ruijl Date: Fri, 16 Aug 2024 12:00:11 +0200 Subject: [PATCH] Add new expression evaluator to Python API - Fix ASM output for repeated register occurrences --- examples/nested_evaluation.rs | 68 +---- src/api/python.rs | 458 +++++++++++++++++++++++++++++----- src/evaluate.rs | 135 +++++++--- src/id.rs | 20 +- symbolica.pyi | 118 ++++++++- 5 files changed, 627 insertions(+), 172 deletions(-) diff --git a/examples/nested_evaluation.rs b/examples/nested_evaluation.rs index 488341d..59a2d36 100644 --- a/examples/nested_evaluation.rs +++ b/examples/nested_evaluation.rs @@ -1,9 +1,7 @@ -use std::time::Instant; - use symbolica::{ - atom::{Atom, AtomView}, - domains::{float::Complex, rational::Rational}, - evaluate::{CompileOptions, ExpressionEvaluator, FunctionMap, InlineASM, OptimizationSettings}, + atom::Atom, + domains::rational::Rational, + evaluate::{CompileOptions, FunctionMap, InlineASM, OptimizationSettings}, state::State, }; @@ -73,64 +71,20 @@ fn main() { OptimizationSettings::default(), ) .unwrap(); + let mut e_f64 = evaluator.map_coeff(&|x| x.into()); - let r = e_f64.evaluate(&[5.]); + let r = e_f64.evaluate_single(&[5.]); println!("{}", r); - let mut tree = - AtomView::to_eval_tree_multiple(&[e1.as_view(), e2.as_view()], &fn_map, ¶ms).unwrap(); - - // optimize the tree using an occurrence-order Horner scheme - println!("Op original {:?}", tree.count_operations()); - tree.horner_scheme(); - println!("Op horner {:?}", tree.count_operations()); - tree.common_subexpression_elimination(); - println!("op cse {:?}", tree.count_operations()); - - //tree.common_pair_elimination(); - //println!("op cpe {:?}", tree.count_operations()); - - let mut evaluator: ExpressionEvaluator = tree.linearize(None).map_coeff(&|x| x.into()); - - let ce = evaluator - .export_cpp( - "nested_evaluation.cpp", - "evaltest", - true, - InlineASM::default(), - ) + let mut compiled = e_f64 + .export_cpp("nested_evaluate.cpp", "nested", true, InlineASM::Intel) .unwrap() - .compile("libneval.so", CompileOptions::default()) + .compile("nested", CompileOptions::default()) .unwrap() .load() .unwrap(); - let params = vec![5.]; - let mut out = vec![0., 0.]; - ce.evaluate(¶ms, &mut out); - println!("Eval from C++: {}, {}", out[0], out[1]); - - { - let params = vec![Complex::new(5., 0.)]; - let mut out = vec![Complex::new_zero(), Complex::new_zero()]; - ce.evaluate(¶ms, &mut out); - println!("Eval from C++: {}, {}", out[0], out[1]); - } - - // benchmark - let t = Instant::now(); - for _ in 0..1000000 { - let _ = ce.evaluate(¶ms, &mut out); - } - println!("C++ time {:#?}", t.elapsed()); - - evaluator.evaluate_multiple(¶ms, &mut out); - println!("Eval: {}, {}", out[0], out[1]); - - let params = vec![5.]; - let t = Instant::now(); - for _ in 0..1000000 { - evaluator.evaluate_multiple(¶ms, &mut out); - } - println!("Eager time {:#?}", t.elapsed()); + let mut out = vec![0.]; + compiled.evaluate(&[5.], &mut out); + println!("{}", out[0]); } diff --git a/src/api/python.rs b/src/api/python.rs index 999cd7a..2db6631 100644 --- a/src/api/python.rs +++ b/src/api/python.rs @@ -38,7 +38,10 @@ use crate::{ }, Ring, }, - evaluate::EvaluationFn, + evaluate::{ + CompileOptions, CompiledEvaluator, EvaluationFn, ExpressionEvaluator, FunctionMap, + InlineASM, OptimizationSettings, + }, id::{ Condition, Match, MatchSettings, MatchStack, Pattern, PatternAtomTreeIterator, PatternRestriction, ReplaceIterator, Replacement, WildcardAndRestriction, @@ -46,15 +49,8 @@ use crate::{ numerical_integration::{ContinuousGrid, DiscreteGrid, Grid, MonteCarloRng, Sample}, parser::Token, poly::{ - evaluate::{ - InstructionEvaluator, InstructionSetMode, InstructionSetModeCPPSettings, - InstructionSetPrinter, - }, - factor::Factorize, - groebner::GroebnerBasis, - polynomial::MultivariatePolynomial, - series::Series, - GrevLexOrder, LexOrder, Variable, INLINED_EXPONENTS, + factor::Factorize, groebner::GroebnerBasis, polynomial::MultivariatePolynomial, + series::Series, GrevLexOrder, LexOrder, Variable, INLINED_EXPONENTS, }, printer::{ AtomPrinter, MatrixPrinter, PolynomialPrinter, PrintOptions, RationalPolynomialPrinter, @@ -82,7 +78,8 @@ fn symbolica(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; @@ -2354,9 +2351,7 @@ impl PythonExpression { condition: ( id, PatternRestriction::Filter(Box::new(move |m| { - let mut a = Atom::default(); - m.to_atom(&mut a); - let data: PythonExpression = a.into(); + let data: PythonExpression = m.to_atom().into(); Python::with_gil(|py| { filter_fn @@ -2518,12 +2513,8 @@ impl PythonExpression { PatternRestriction::Cmp( other_id, Box::new(move |m1, m2| { - let mut a = Atom::default(); - m1.to_atom(&mut a); - let data1: PythonExpression = a.clone().into(); - - m2.to_atom(&mut a); - let data2: PythonExpression = a.into(); + let data1: PythonExpression = m1.to_atom().into(); + let data2: PythonExpression = m2.to_atom().into(); Python::with_gil(|py| { cmp_fn @@ -3649,6 +3640,188 @@ impl PythonExpression { .evaluate(|x| x.into(), &constants, &functions, &mut cache); Ok(PyComplex::from_doubles(py, r.re, r.im)) } + + /// Create an evaluator that can evaluate (nested) expressions in an optimized fashion. + /// All constants and functions should be provided as dictionaries, where the function + /// dictionary has a key `(name, printable name, arguments)` and the value is the function + /// body. For example the function `f(x,y)=x^2+y` should be provided as + /// `{(f, "f", (x, y)): x**2 + y}`. All free parameters should be provided in the `params` list. + /// + /// Examples + /// -------- + /// >>> from symbolica import * + /// >>> x, y, z, pi, f, g = Expression.symbols( + /// >>> 'x', 'y', 'z', 'pi', 'f', 'g') + /// >>> + /// >>> e1 = Expression.parse("x + pi + cos(x) + f(g(x+1),x*2)") + /// >>> fd = Expression.parse("y^2 + z^2*y^2") + /// >>> gd = Expression.parse("y + 5") + /// >>> + /// >>> ev = e1.evaluator({pi: Expression.num(22)/7}, + /// >>> {(f, "f", (y, z)): fd, (g, "g", (y, )): gd}, [x]) + /// >>> res = ev.evaluate([[1.], [2.], [3.]]) # evaluate at x=1, x=2, x=3 + /// >>> print(res) + #[pyo3(signature = + (constants, + functions, + params, + iterations = 100, + n_cores = 4, + verbose = false), + )] + pub fn evaluator( + &self, + constants: HashMap, + functions: HashMap<(Variable, String, Vec), PythonExpression>, + params: Vec, + iterations: usize, + n_cores: usize, + verbose: bool, + ) -> PyResult { + let mut fn_map = FunctionMap::new(); + + for (k, v) in &constants { + if let Ok(r) = v.expr.clone().try_into() { + fn_map.add_constant(k.expr.as_view(), r); + } else { + Err(exceptions::PyValueError::new_err(format!( + "Constants must be rationals. If this is not possible, pass the value as a parameter", + )))? + } + } + + for ((symbol, rename, args), body) in &functions { + let symbol = symbol + .to_id() + .ok_or(exceptions::PyValueError::new_err(format!( + "Bad function name {}", + symbol + )))?; + let args: Vec<_> = args + .iter() + .map(|x| { + x.to_id().ok_or(exceptions::PyValueError::new_err(format!( + "Bad function name {}", + symbol + ))) + }) + .collect::>()?; + + fn_map + .add_function(symbol, rename.clone(), args, body.expr.as_view()) + .map_err(|e| { + exceptions::PyValueError::new_err(format!("Could not add function: {}", e)) + })?; + } + + let settings = OptimizationSettings { + horner_iterations: iterations, + n_cores, + verbose, + ..OptimizationSettings::default() + }; + + let params: Vec<_> = params.iter().map(|x| x.expr.clone()).collect(); + + let eval = self + .expr + .evaluator(&fn_map, ¶ms, settings) + .map_err(|e| { + exceptions::PyValueError::new_err(format!("Could not create evaluator: {}", e)) + })?; + + let eval_f64 = eval.map_coeff(&|x| x.to_f64()); + + Ok(PythonExpressionEvaluator { eval: eval_f64 }) + } + + /// Create an evaluator that can jointly evaluate (nested) expressions in an optimized fashion. + /// See `Expression.evaluator()` for more information. + /// + /// Examples + /// -------- + /// >>> from symbolica import * + /// >>> x = Expression.symbols('x') + /// >>> e1 = Expression.parse("x^2 + 1") + /// >>> e2 = Expression.parse("x^2 + 2) + /// >>> ev = Expression.evaluator_multiple([e1, e2], {}, {}, [x]) + /// + /// will recycle the `x^2` + #[classmethod] + #[pyo3(signature = + (exprs, + constants, + functions, + params, + iterations = 100, + n_cores = 4, + verbose = false), + )] + pub fn evaluator_multiple( + _cls: &PyType, + exprs: Vec, + constants: HashMap, + functions: HashMap<(Variable, String, Vec), PythonExpression>, + params: Vec, + iterations: usize, + n_cores: usize, + verbose: bool, + ) -> PyResult { + let mut fn_map = FunctionMap::new(); + + for (k, v) in &constants { + if let Ok(r) = v.expr.clone().try_into() { + fn_map.add_constant(k.expr.as_view(), r); + } else { + Err(exceptions::PyValueError::new_err(format!( + "Constants must be rationals. If this is not possible, pass the value as a parameter", + )))? + } + } + + for ((symbol, rename, args), body) in &functions { + let symbol = symbol + .to_id() + .ok_or(exceptions::PyValueError::new_err(format!( + "Bad function name {}", + symbol + )))?; + let args: Vec<_> = args + .iter() + .map(|x| { + x.to_id().ok_or(exceptions::PyValueError::new_err(format!( + "Bad function name {}", + symbol + ))) + }) + .collect::>()?; + + fn_map + .add_function(symbol, rename.clone(), args, body.expr.as_view()) + .map_err(|e| { + exceptions::PyValueError::new_err(format!("Could not add function: {}", e)) + })?; + } + + let settings = OptimizationSettings { + horner_iterations: iterations, + n_cores, + verbose, + ..OptimizationSettings::default() + }; + + let params: Vec<_> = params.iter().map(|x| x.expr.clone()).collect(); + + let exprs = exprs.iter().map(|x| x.expr.as_view()).collect::>(); + + let eval = Atom::evaluator_multiple(&exprs, &fn_map, ¶ms, settings).map_err(|e| { + exceptions::PyValueError::new_err(format!("Could not create evaluator: {}", e)) + })?; + + let eval_f64 = eval.map_coeff(&|x| x.to_f64()); + + Ok(PythonExpressionEvaluator { eval: eval_f64 }) + } } /// A raplacement, which is a pattern and a right-hand side, with optional conditions and settings. @@ -4160,13 +4333,7 @@ impl PythonMatchIterator { i.next().map(|m| { m.match_stack .into_iter() - .map(|m| { - (Atom::new_var(m.0).into(), { - let mut a = Atom::default(); - m.1.to_atom(&mut a); - a.into() - }) - }) + .map(|m| (Atom::new_var(m.0).into(), { m.1.to_atom().into() })) .collect() }) }) @@ -4762,41 +4929,6 @@ impl PythonPolynomial { } } - /// Optimize the polynomial for evaluation using `iterations` number of iterations. - /// The optimized output can be exported in a C++ format using `to_file`. - /// - /// Returns an evaluator for the polynomial. - #[pyo3(signature = (iterations = 1000, to_file = None))] - pub fn optimize( - &self, - iterations: usize, - to_file: Option, - ) -> PyResult { - let o = self.poly.optimize(iterations); - if let Some(file) = to_file.as_ref() { - std::fs::write( - file, - format!( - "{}", - InstructionSetPrinter { - name: "evaluate".to_string(), - instr: &o, - mode: InstructionSetMode::CPP(InstructionSetModeCPPSettings { - write_header_and_test: true, - always_pass_output_array: false, - }) - } - ), - ) - .unwrap(); - } - - let o_f64 = o.convert::(); - Ok(PythonInstructionEvaluator { - instr: o_f64.evaluator(), - }) - } - /// Compute the Groebner basis of a polynomial system. /// /// If `grevlex=True`, reverse graded lexicographical ordering is used, @@ -8272,23 +8404,211 @@ impl ConvertibleToRationalPolynomial { } } +/// An optimized evaluator for expressions. #[pyclass(name = "Evaluator", module = "symbolica")] #[derive(Clone)] -pub struct PythonInstructionEvaluator { - pub instr: InstructionEvaluator, +pub struct PythonExpressionEvaluator { + pub eval: ExpressionEvaluator, +} + +/// A compiled and optimized evaluator for expressions. +#[pyclass(name = "CompiledEvaluator", module = "symbolica")] +#[derive(Clone)] +pub struct PythonCompiledExpressionEvaluator { + pub eval: CompiledEvaluator, + pub output_len: usize, +} + +#[pymethods] +impl PythonCompiledExpressionEvaluator { + /// Load a compiled library, previously generated with `compile`. + #[classmethod] + fn load( + _cls: &PyType, + filename: &str, + function_name: &str, + output_len: usize, + ) -> PyResult { + Ok(Self { + eval: CompiledEvaluator::load(filename, function_name) + .map_err(|e| exceptions::PyValueError::new_err(format!("Load error: {}", e)))?, + output_len, + }) + } + + /// Evaluate the expression for multiple inputs and return the result. + fn evaluate_single(&mut self, inputs: Vec>) -> Vec { + let mut res = vec![0.; self.output_len]; + inputs + .iter() + .map(|s| { + self.eval.evaluate(s, &mut res); + res[0] + }) + .collect() + } + + /// Evaluate the expression for multiple inputs and return the result. + fn evaluate_complex_single<'py>( + &mut self, + py: Python<'py>, + inputs: Vec>>, + ) -> Vec<&'py PyComplex> { + let mut res = vec![Complex::new_zero(); self.output_len]; + inputs + .iter() + .map(|s| { + self.eval.evaluate(s, &mut res); + PyComplex::from_doubles(py, res[0].re, res[0].im) + }) + .collect() + } + + /// Evaluate the expression for multiple inputs and return the results. + fn evaluate(&mut self, inputs: Vec>) -> Vec> { + inputs + .iter() + .map(|s| { + let mut v = vec![0.; self.output_len]; + self.eval.evaluate(s, &mut v); + v + }) + .collect() + } + + /// Evaluate the expression for multiple inputs and return the results. + fn evaluate_complex<'py>( + &mut self, + python: Python<'py>, + inputs: Vec>>, + ) -> Vec> { + let mut v = vec![Complex::new_zero(); self.output_len]; + inputs + .iter() + .map(|s| { + self.eval.evaluate(s, &mut v); + v.iter() + .map(|x| PyComplex::from_doubles(python, x.re, x.im)) + .collect() + }) + .collect() + } } #[pymethods] -impl PythonInstructionEvaluator { - /// Evaluate the polynomial for multiple inputs and return the result. - fn evaluate(&self, inputs: Vec>) -> Vec { - let mut eval = self.instr.clone(); +impl PythonExpressionEvaluator { + /// Evaluate the expression for multiple inputs and return the result. + fn evaluate_single(&mut self, inputs: Vec>) -> Vec { + let mut res = vec![0.]; + inputs + .iter() + .map(|s| { + self.eval.evaluate(s, &mut res); + res[0] + }) + .collect() + } + + /// Evaluate the expression for multiple inputs and return the result. + fn evaluate_complex_single<'py>( + &mut self, + py: Python<'py>, + inputs: Vec>>, + ) -> Vec<&'py PyComplex> { + // FIXME: clone every time + let mut eval = self.eval.clone().map_coeff(&|x| Complex::new(*x, 0.)); + + let mut res = vec![Complex::new_zero()]; + inputs + .iter() + .map(|s| { + eval.evaluate(s, &mut res); + PyComplex::from_doubles(py, res[0].re, res[0].im) + }) + .collect() + } + /// Evaluate the expression for multiple inputs and return the results. + fn evaluate(&mut self, inputs: Vec>) -> Vec> { inputs .iter() - .map(|s| eval.evaluate_with_input(s)[0]) + .map(|s| { + let mut v = vec![0.; self.eval.get_output_len()]; + self.eval.evaluate(s, &mut v); + v + }) .collect() } + + /// Evaluate the expression for multiple inputs and return the results. + fn evaluate_complex<'py>( + &mut self, + python: Python<'py>, + inputs: Vec>>, + ) -> Vec> { + let mut eval = self.eval.clone().map_coeff(&|x| Complex::new(*x, 0.)); + + let mut v = vec![Complex::new_zero(); self.eval.get_output_len()]; + inputs + .iter() + .map(|s| { + eval.evaluate(s, &mut v); + v.iter() + .map(|x| PyComplex::from_doubles(python, x.re, x.im)) + .collect() + }) + .collect() + } + + /// Compile the evaluator to a shared library using C++ and optionally inline assembly and load it. + #[pyo3(signature = + (function_name, + filename, + library_name, + inline_asm = true, + optimization_level = 3, + compiler_path = None, + ))] + fn compile( + &self, + function_name: &str, + filename: &str, + library_name: &str, + inline_asm: bool, + optimization_level: u8, + compiler_path: Option<&str>, + ) -> PyResult { + let mut options = CompileOptions::default(); + options.optimization_level = optimization_level as usize; + if let Some(compiler_path) = compiler_path { + options.compiler = compiler_path.to_string(); + } + + Ok(PythonCompiledExpressionEvaluator { + eval: self + .eval + .export_cpp( + filename, + function_name, + true, + if inline_asm { + InlineASM::Intel + } else { + InlineASM::None + }, + ) + .map_err(|e| exceptions::PyValueError::new_err(format!("Export error: {}", e)))? + .compile(library_name, options) + .map_err(|e| { + exceptions::PyValueError::new_err(format!("Compilation error: {}", e)) + })? + .load() + .map_err(|e| { + exceptions::PyValueError::new_err(format!("Library loading error: {}", e)) + })?, + output_len: self.eval.get_output_len(), + }) + } } #[derive(FromPyObject)] diff --git a/src/evaluate.rs b/src/evaluate.rs index 60d220a..0e4bc5c 100644 --- a/src/evaluate.rs +++ b/src/evaluate.rs @@ -14,11 +14,14 @@ use self_cell::self_cell; use crate::{ atom::{representation::InlineVar, Atom, AtomOrView, AtomView, Symbol}, coefficient::CoefficientView, + combinatorics::unique_permutations, domains::{ float::{Complex, NumericalFloatLike, Real}, + integer::Integer, rational::Rational, }, state::State, + LicenseManager, }; type EvalFnType = Box< @@ -146,6 +149,7 @@ pub struct OptimizationSettings { pub n_cores: usize, pub cpe_iterations: Option, pub hot_start: Option>>, + pub verbose: bool, } impl Default for OptimizationSettings { @@ -155,6 +159,7 @@ impl Default for OptimizationSettings { n_cores: 1, cpe_iterations: None, hot_start: None, + verbose: false, } } } @@ -188,7 +193,9 @@ impl Atom { } /// Create an efficient evaluator for a (nested) expression. - /// All variables and all user functions in the expression must occur in the map. + /// All free parameters must appear in `params` and all other variables + /// and user functions in the expression must occur in the function map. + /// The function map may have nested expressions. pub fn evaluator<'a>( &'a self, fn_map: &FunctionMap<'a, Rational>, @@ -200,6 +207,7 @@ impl Atom { optimization_settings.horner_iterations, optimization_settings.n_cores, optimization_settings.hot_start.clone(), + optimization_settings.verbose, )) } @@ -217,6 +225,7 @@ impl Atom { optimization_settings.horner_iterations, optimization_settings.n_cores, optimization_settings.hot_start.clone(), + optimization_settings.verbose, )) } } @@ -643,13 +652,13 @@ pub struct ExpressionEvaluator { } impl ExpressionEvaluator { - pub fn evaluate(&mut self, params: &[T]) -> T { + pub fn evaluate_single(&mut self, params: &[T]) -> T { let mut res = T::new_zero(); - self.evaluate_multiple(params, std::slice::from_mut(&mut res)); + self.evaluate(params, std::slice::from_mut(&mut res)); res } - pub fn evaluate_multiple(&mut self, params: &[T], out: &mut [T]) { + pub fn evaluate(&mut self, params: &[T], out: &mut [T]) { for (t, p) in self.stack.iter_mut().zip(params) { *t = p.clone(); } @@ -712,6 +721,10 @@ impl ExpressionEvaluator { } } + pub fn get_output_len(&self) -> usize { + self.result_indices.len() + } + fn remove_common_pairs(&mut self) -> usize { let mut pairs: HashMap<_, Vec> = HashMap::default(); @@ -1043,14 +1056,19 @@ impl ExpressionEvaluator { include_header: bool, inline_asm: InlineASM, ) -> Result { + let mut filename = filename.to_string(); + if !filename.ends_with(".cpp") { + filename += ".cpp"; + } + let cpp = match inline_asm { InlineASM::Intel => self.export_asm_str(function_name, include_header), InlineASM::None => self.export_cpp_str(function_name, include_header), }; - let _ = std::fs::write(filename, cpp)?; + let _ = std::fs::write(&filename, cpp)?; Ok(ExportedCode { - source_filename: filename.to_string(), + source_filename: filename, function_name: function_name.to_string(), inline_asm, }) @@ -1461,8 +1479,9 @@ impl ExpressionEvaluator { MemOrReg::Reg(out_reg) => { if let Some(j) = a.iter().find(|x| **x == MemOrReg::Reg(*out_reg)) { // we can recycle the register completely + let mut first_skipped = false; for i in a { - if i != j { + if first_skipped || i != j { match i { MemOrReg::Reg(k) => { *out += &format!( @@ -1480,14 +1499,16 @@ impl ExpressionEvaluator { } } } + first_skipped |= i == j; } } else if let Some(MemOrReg::Reg(j)) = a.iter().find(|x| matches!(x, MemOrReg::Reg(_))) { *out += &format!("\t\t\"movapd xmm{}, xmm{}\\n\\t\"\n", out_reg, j); + let mut first_skipped = false; for i in a { - if *i != MemOrReg::Reg(*j) { + if first_skipped || *i != MemOrReg::Reg(*j) { match i { MemOrReg::Reg(k) => { *out += &format!( @@ -1505,6 +1526,7 @@ impl ExpressionEvaluator { } } } + first_skipped |= *i == MemOrReg::Reg(*j); } } else { if let MemOrReg::Mem(k) = &a[0] { @@ -1538,8 +1560,9 @@ impl ExpressionEvaluator { *out += &format!("\t\t\"movapd xmm{}, xmm{}\\n\\t\"\n", out_reg, j); + let mut first_skipped = false; for i in a { - if *i != MemOrReg::Reg(*j) { + if first_skipped || *i != MemOrReg::Reg(*j) { match i { MemOrReg::Reg(k) => { *out += &format!( @@ -1557,6 +1580,8 @@ impl ExpressionEvaluator { } } } + + first_skipped |= *i == MemOrReg::Reg(*j); } } else { if let MemOrReg::Mem(k) = &a[0] { @@ -2299,8 +2324,9 @@ impl EvalTree { iterations: usize, n_cores: usize, start_scheme: Option>>, + verbose: bool, ) -> ExpressionEvaluator { - let _ = self.optimize_horner_scheme(iterations, n_cores, start_scheme); + let _ = self.optimize_horner_scheme(iterations, n_cores, start_scheme, verbose); self.common_subexpression_elimination(); self.clone().linearize(None) } @@ -2335,6 +2361,7 @@ impl EvalTree { iterations: usize, n_cores: usize, start_scheme: Option>>, + verbose: bool, ) -> Vec> { let v = match start_scheme { Some(a) => a, @@ -2363,6 +2390,7 @@ impl EvalTree { &v, iterations, n_cores, + verbose, ); for e in &mut self.expressions.tree { e.apply_horner_scheme(&scheme); @@ -2384,8 +2412,9 @@ impl EvalTree { v.sort_by_key(|k| std::cmp::Reverse(k.1)); let v = v.into_iter().map(|(k, _)| k).collect::>(); - let scheme = - Expression::optimize_horner_scheme_multiple(&e.tree, &v, iterations, n_cores); + let scheme = Expression::optimize_horner_scheme_multiple( + &e.tree, &v, iterations, n_cores, verbose, + ); for t in &mut e.tree { t.apply_horner_scheme(&scheme); @@ -2660,8 +2689,15 @@ impl Expression { vars: &[Self], iterations: usize, n_cores: usize, + verbose: bool, ) -> Vec { - Self::optimize_horner_scheme_multiple(std::slice::from_ref(self), vars, iterations, n_cores) + Self::optimize_horner_scheme_multiple( + std::slice::from_ref(self), + vars, + iterations, + n_cores, + verbose, + ) } pub fn optimize_horner_scheme_multiple( @@ -2669,6 +2705,7 @@ impl Expression { vars: &[Self], iterations: usize, n_cores: usize, + verbose: bool, ) -> Vec { if vars.len() == 0 { return vars.to_vec(); @@ -2689,33 +2726,65 @@ impl Expression { best_ops = (best_ops.0 + ops.0, best_ops.1 + ops.1); } - println!("init {:?} {:?} {}", best_ops, vars, vars.len()); + if verbose { + println!( + "Initial ops: {} additions and {} multiplications", + best_ops.0, best_ops.1 + ); + } let best_mul = Arc::new(AtomicUsize::new(best_ops.1)); let best_add = Arc::new(AtomicUsize::new(best_ops.0)); let best_scheme = Arc::new(Mutex::new(vars.to_vec())); + let permutations = + if vars.len() < 10 && Integer::factorial(vars.len() as u32) <= iterations.max(1) { + let v: Vec<_> = (0..vars.len()).collect(); + Some(unique_permutations(&v).1) + } else { + None + }; + let p_ref = &permutations; + + let n_cores = if LicenseManager::is_licensed() { + n_cores + } else { + 1 + }; + std::thread::scope(|s| { - for _ in 0..n_cores { - let mut vars = vars.to_vec(); + for i in 0..n_cores { + let mut cvars = vars.to_vec(); let best_scheme = best_scheme.clone(); let best_mul = best_mul.clone(); let best_add = best_add.clone(); s.spawn(move || { let mut r = thread_rng(); - for _ in 0..iterations / n_cores { + for j in 0..iterations / n_cores { // try a random swap - let t1 = r.gen_range(0..vars.len()); - let t2 = r.gen_range(0..vars.len()); + let mut t1 = 0; + let mut t2 = 0; + + if let Some(p) = p_ref { + if j >= p.len() / n_cores { + break; + } + + let perm = &p[i * (p.len() / n_cores) + j]; + cvars = perm.iter().map(|x| vars[*x].clone()).collect(); + } else { + t1 = r.gen_range(0..cvars.len()); + t2 = r.gen_range(0..cvars.len() - 1); - vars.swap(t1, t2); + cvars.swap(t1, t2); + } let horner: Vec<_> = expressions .iter() .map(|x| { let mut h = x.clone(); - h.apply_horner_scheme(&vars); + h.apply_horner_scheme(&cvars); h.to_hashed_expression().1 }) .collect(); @@ -2732,13 +2801,19 @@ impl Expression { || cur_ops.1 == best_mul.load(Ordering::Relaxed) && cur_ops.0 <= best_add.load(Ordering::Relaxed) { - println!("new best {:?}", cur_ops); + if verbose { + println!( + "Accept move at core iteration {}/{}: {} additions and {} multiplications", + j, iterations / n_cores, + cur_ops.0, cur_ops.1 + ); + } best_mul.store(cur_ops.1, Ordering::Relaxed); best_add.store(cur_ops.0, Ordering::Relaxed); - best_scheme.lock().unwrap().clone_from_slice(&vars); + best_scheme.lock().unwrap().clone_from_slice(&cvars); } else { - vars.swap(t1, t2); + cvars.swap(t1, t2); } } }); @@ -3250,19 +3325,19 @@ impl Clone for CompiledEvaluator { /// A floating point type that can be used for compiled evaluation. pub trait CompiledEvaluatorFloat: Sized { - fn evaluate(eval: &CompiledEvaluator, args: &[Self], out: &mut [Self]); + fn evaluate(eval: &mut CompiledEvaluator, args: &[Self], out: &mut [Self]); } impl CompiledEvaluatorFloat for f64 { #[inline(always)] - fn evaluate(eval: &CompiledEvaluator, args: &[Self], out: &mut [Self]) { + fn evaluate(eval: &mut CompiledEvaluator, args: &[Self], out: &mut [Self]) { eval.evaluate_double(args, out); } } impl CompiledEvaluatorFloat for Complex { #[inline(always)] - fn evaluate(eval: &CompiledEvaluator, args: &[Self], out: &mut [Self]) { + fn evaluate(eval: &mut CompiledEvaluator, args: &[Self], out: &mut [Self]) { eval.evaluate_complex(args, out); } } @@ -3356,13 +3431,13 @@ impl CompiledEvaluator { /// Evaluate the compiled code. #[inline(always)] - pub fn evaluate(&self, args: &[T], out: &mut [T]) { + pub fn evaluate(&mut self, args: &[T], out: &mut [T]) { T::evaluate(self, args, out); } /// Evaluate the compiled code with double-precision floating point numbers. #[inline(always)] - pub fn evaluate_double(&self, args: &[f64], out: &mut [f64]) { + pub fn evaluate_double(&mut self, args: &[f64], out: &mut [f64]) { unsafe { (self.library.borrow_dependent().eval_double)( args.as_ptr(), @@ -3374,7 +3449,7 @@ impl CompiledEvaluator { /// Evaluate the compiled code with complex numbers. #[inline(always)] - pub fn evaluate_complex(&self, args: &[Complex], out: &mut [Complex]) { + pub fn evaluate_complex(&mut self, args: &[Complex], out: &mut [Complex]) { unsafe { (self.library.borrow_dependent().eval_complex)( args.as_ptr(), diff --git a/src/id.rs b/src/id.rs index 919c359..df6347f 100644 --- a/src/id.rs +++ b/src/id.rs @@ -821,7 +821,7 @@ impl Pattern { match self { Pattern::Wildcard(name) => { if let Some(w) = match_stack.get(*name) { - w.to_atom(out); + w.to_atom_into(out); } else if match_stack.settings.allow_new_wildcards_on_rhs { out.to_var(*name); } else { @@ -863,7 +863,7 @@ impl Pattern { } _ => { let mut handle = workspace.new_atom(); - w.to_atom(&mut handle); + w.to_atom_into(&mut handle); func.add_arg(handle.as_view()) } }, @@ -902,7 +902,7 @@ impl Pattern { Match::Single(s) => out.set_from_view(s), Match::Multiple(_, _) => { let mut handle = workspace.new_atom(); - w.to_atom(&mut handle); + w.to_atom_into(&mut handle); out.set_from_view(&handle.as_view()) } Match::FunctionName(_) => { @@ -947,7 +947,7 @@ impl Pattern { } _ => { let mut handle = workspace.new_atom(); - w.to_atom(&mut handle); + w.to_atom_into(&mut handle); mul.extend(handle.as_view()) } }, @@ -990,7 +990,7 @@ impl Pattern { } _ => { let mut handle = workspace.new_atom(); - w.to_atom(&mut handle); + w.to_atom_into(&mut handle); add.extend(handle.as_view()) } }, @@ -1488,7 +1488,15 @@ impl<'a> std::fmt::Debug for Match<'a> { impl<'a> Match<'a> { /// Create a new atom from a matched subexpression. /// Arguments lists are wrapped in the function `arg`. - pub fn to_atom(&self, out: &mut Atom) { + pub fn to_atom(&self) -> Atom { + let mut out = Atom::default(); + self.to_atom_into(&mut out); + out + } + + /// Create a new atom from a matched subexpression. + /// Arguments lists are wrapped in the function `arg`. + pub fn to_atom_into(&self, out: &mut Atom) { match self { Self::Single(v) => { out.set_from_view(v); diff --git a/symbolica.pyi b/symbolica.pyi index 7a239a8..987a20e 100644 --- a/symbolica.pyi +++ b/symbolica.pyi @@ -1079,6 +1079,62 @@ class Expression: >>> print(e.evaluate_complex({x: 1 + 2j, y: 4 + 3j}, {})) """ + def evaluator( + self, + constants: dict[Expression, Expression], + funs: dict[Tuple[Expression, str, Sequence[Expression]], Expression], + params: Sequence[Expression], + iterations: int = 100, + n_cores: int = 4, + verbose: bool = False, + ) -> Evaluator: + """Create an evaluator that can evaluate (nested) expressions in an optimized fashion. + All constants and functions should be provided as dictionaries, where the function + dictionary has a key `(name, printable name, arguments)` and the value is the function + body. For example the function `f(x,y)=x^2+y` should be provided as + `{(f, "f", (x, y)): x**2 + y}`. All free parameters should be provided in the `params` list. + + Examples + -------- + >>> from symbolica import * + >>> x, y, z, pi, f, g = Expression.symbols( + >>> 'x', 'y', 'z', 'pi', 'f', 'g') + >>> + >>> e1 = Expression.parse("x + pi + cos(x) + f(g(x+1),x*2)") + >>> fd = Expression.parse("y^2 + z^2*y^2") + >>> gd = Expression.parse("y + 5") + >>> + >>> ev = e1.evaluator({pi: Expression.num(22)/7}, + >>> {(f, "f", (y, z)): fd, (g, "g", (y, )): gd}, [x]) + >>> res = ev.evaluate([[1.], [2.], [3.]]) # evaluate at x=1, x=2, x=3 + >>> print(res) + """ + + @classmethod + def evaluator_multiple( + _cls, + exprs: Sequence[Expression], + constants: dict[Expression, Expression], + funs: dict[Tuple[Expression, str, Sequence[Expression]], Expression], + params: Sequence[Expression], + iterations: int = 100, + n_cores: int = 4, + verbose: bool = False, + ) -> Evaluator: + """Create an evaluator that can jointly evaluate (nested) expressions in an optimized fashion. + See `Expression.evaluator()` for more information. + + Examples + -------- + >>> from symbolica import * + >>> x = Expression.symbols('x') + >>> e1 = Expression.parse("x^2 + 1") + >>> e2 = Expression.parse("x^2 + 2) + >>> ev = Expression.evaluator_multiple([e1, e2], {}, {}, [x]) + + will recycle the `x^2` + """ + class Replacement: """A replacement of a pattern by a right-hand side.""" @@ -1810,14 +1866,6 @@ class Polynomial: def to_finite_field(self, prime: int) -> FiniteFieldPolynomial: """Convert the coefficients of the polynomial to a finite field with prime `prime`.""" - def optimize(self, iterations: int = 1000, to_file: str | None = None) -> Evaluator: - """ - Optimize the polynomial for evaluation using `iterations` number of iterations. - The optimized output can be exported in a C++ format using `to_file`. - - Returns an evaluator for the polynomial. - """ - def factor_square_free(self) -> list[Tuple[Polynomial, int]]: """Compute the square-free factorization of the polynomial. @@ -2790,8 +2838,58 @@ class Matrix: class Evaluator: """An optimized evaluator of an expression.""" - def evaluate(self, inputs: Sequence[Sequence[float]]) -> List[float]: - """Evaluate the polynomial for multiple inputs and return the result.""" + def compile( + self, + function_name: str, + filename: str, + library_name: str, + inline_asm: bool = True, + optimization_level: int = 3, + compiler_path: Optional[str] = None, + ) -> CompiledEvaluator: + """Compile the evaluator to a shared library using C++ and optionally inline assembly and load it.""" + + def evaluate(self, inputs: Sequence[Sequence[float]]) -> List[List[float]]: + """Evaluate the expression for multiple inputs and return the result.""" + + def evaluate_single(self, inputs: Sequence[Sequence[float]]) -> List[float]: + """Evaluate the expression for multiple inputs and return the result, which + is a single value.""" + + def evaluate_complex(self, inputs: Sequence[Sequence[complex]]) -> List[List[complex]]: + """Evaluate the expression for multiple inputs and return the result.""" + + def evaluate_complex_single(self, inputs: Sequence[Sequence[complex]]) -> List[complex]: + """Evaluate the expression for multiple inputs and return the result, which + is a single value.""" + + +class CompiledEvaluator: + """An compiled evaluator of an expression. This will give the highest performance of + all evaluators.""" + + @classmethod + def load( + _cls, + filename: str, + function_name: str, + output_len: int, + ) -> CompiledEvaluator: + """Load a compiled library, previously generated with `Evaluator.compile()`.""" + + def evaluate(self, inputs: Sequence[Sequence[float]]) -> List[List[float]]: + """Evaluate the expression for multiple inputs and return the result.""" + + def evaluate_single(self, inputs: Sequence[Sequence[float]]) -> List[float]: + """Evaluate the expression for multiple inputs and return the result, which + is a single value.""" + + def evaluate_complex(self, inputs: Sequence[Sequence[complex]]) -> List[List[complex]]: + """Evaluate the expression for multiple inputs and return the result.""" + + def evaluate_complex_single(self, inputs: Sequence[Sequence[complex]]) -> List[complex]: + """Evaluate the expression for multiple inputs and return the result, which + is a single value.""" class NumericalIntegrator: