implement a fast atan approximation function (#1583)

implement a fast atan function using an approximate function for x in [-1, 1], for |x| > 1, the identity arctan(x) = sign(pi/2, x) - arctan(1/x) (Thanks to Eric) is used. Two versions of the approximate functions are included. 1. Efficient Approximations for the Arctangent Function by Rajan 2006: Equation 9: Max error ~0.0015 rad 2. https://stackoverflow.com/questions/42537957/fast-accurate-atan-arctan-approximation-algorithm: Max error ~0.00063 rad Generally I find the form from stackoverflow is more accurate and also faster. 1) is also discussed in 2). In general, from testing, atanf is roughly more than 2 times faster than std::atan.
AMReX-Astro · Jun 22, 2024 · 134200e · 134200e
1 parent 7d846d0
commit 134200e
Show file tree

Hide file tree

Showing 11 changed files with 686 additions and 403 deletions.
diff --git a/Make.Microphysics_extern b/Make.Microphysics_extern
@@ -140,6 +140,7 @@ ifeq ($(USE_NSE_NET), TRUE)
      EXTERN_CORE += $(MICROPHYSICS_HOME)/nse_solver
      EXTERN_CORE += $(MICROPHYSICS_HOME)/util/hybrj
 endif
+EXTERN_CORE += $(MICROPHYSICS_HOME)/util/approx_math
 
 # we can't have both NSE solvers
 ifeq ($(USE_NSE_TABLE), TRUE)

diff --git a/screening/screen.H b/screening/screen.H
@@ -19,6 +19,7 @@
 #include <cmath>
 #include <screen_data.H>
 #include <extern_parameters.H>
+#include <approx_math.H>
 
 using namespace amrex::literals;
 
@@ -660,7 +661,7 @@ void chugunov2009_f0 (const amrex::Real gamma, const amrex::Real dlog_dT, amrex:
         dterm2_dgamma = 1.0_rt / (2.0_rt * term1);
     }
 
-    term3 = gamma_12 - std::atan(gamma_12);
+    term3 = gamma_12 - fast_atan(gamma_12);
     if constexpr (do_T_derivatives) {
         dterm3_dgamma = 0.5_rt * gamma_12 / (1.0_rt + gamma);
     }
@@ -817,7 +818,7 @@ void chabrier1998_helmholtz_F(const amrex::Real gamma, const amrex::Real dgamma_
 
     f = A_1 * (sqrt_gamma_A2_gamma -
                A_2 * std::log(sqrt_gamma_A2 + sqrt_1_gamma_A2)) +
-               2.0_rt * A_3 * (sqrt_gamma - std::atan(sqrt_gamma));
+               2.0_rt * A_3 * (sqrt_gamma - fast_atan(sqrt_gamma));
 
     if constexpr (do_T_derivatives) {
         df_dT = A_1 * ((A_2 + 2.0_rt * gamma) / (2.0_rt * sqrt_gamma_A2_gamma) -

diff --git a/unit_test/test_ase/ci-benchmarks/ase_nse_net_unit_test.out b/unit_test/test_ase/ci-benchmarks/ase_nse_net_unit_test.out
@@ -1,4 +1,5 @@
-AMReX (23.05-4-ga393d7ff7e32) initialized
+Initializing AMReX (23.07-395-ge6c93bf22695)...
+AMReX (23.07-395-ge6c93bf22695) initialized
 starting the single zone burn...
 reading in network electron-capture / beta-decay tables...
 chemical potential of proton is -3
@@ -7,28 +8,28 @@ State Density (g/cm^3): 10000000
 State Temperature (K): 6000000000
 electron fraction is 0.5
 NSE state: 
-n : 0.0006075658986
-H1 : 0.001992231785
-He4 : 0.5191546413
-C12 : 1.326178988e-05
-N13 : 9.476614056e-11
-N14 : 2.565265841e-09
-O16 : 3.224715368e-05
-F18 : 5.420149548e-11
-Ne20 : 7.424231652e-07
-Ne21 : 4.996678197e-08
-Na22 : 2.606751428e-09
-Na23 : 1.008351077e-06
-Mg24 : 0.0001021744034
-Al27 : 0.000554657875
-Si28 : 0.03679926049
-P31 : 0.04228638044
-S32 : 0.03865876461
-Ar36 : 0.02465328319
-Ca40 : 0.02885085912
-Ti44 : 0.001660132226
-Cr48 : 0.009959852831
-Fe52 : 0.05964589202
-Ni56 : 0.2350269888
+n : 0.0006075693495
+H1 : 0.001992432534
+He4 : 0.519007877
+C12 : 1.326102e-05
+N13 : 9.477376691e-11
+N14 : 2.565486854e-09
+O16 : 3.225455246e-05
+F18 : 5.422064179e-11
+Ne20 : 7.427509432e-07
+Ne21 : 4.998912611e-08
+Na22 : 2.608008186e-09
+Na23 : 1.00884295e-06
+Mg24 : 0.0001022222509
+Al27 : 0.0005548826609
+Si28 : 0.03680966846
+P31 : 0.04229223791
+S32 : 0.03865771985
+Ar36 : 0.02464742443
+Ca40 : 0.02885152622
+Ti44 : 0.00166123105
+Cr48 : 0.009967017598
+Fe52 : 0.05968005236
+Ni56 : 0.2351208159
 We're in NSE. 
-AMReX (23.05-4-ga393d7ff7e32) finalized
+AMReX (23.07-395-ge6c93bf22695) finalized
diff --git a/unit_test/test_ase/make_table/ci-benchmarks/ase_nse_net_make_table_unit_test.out b/unit_test/test_ase/make_table/ci-benchmarks/ase_nse_net_make_table_unit_test.out
diff --git a/unit_test/test_nse/ci-benchmarks/aprox21_ci.out b/unit_test/test_nse/ci-benchmarks/aprox21_ci.out
@@ -1,7 +1,7 @@
-Initializing AMReX (23.12-21-gef38229189e3)...
-AMReX (23.12-21-gef38229189e3) initialized
+Initializing AMReX (23.07-395-ge6c93bf22695)...
+AMReX (23.07-395-ge6c93bf22695) initialized
 starting the single zone burn...
-0x7ffcb5676e40
+0x7ffee3940620
 State Density (g/cm^3): 277355338.4
 State Temperature (K): 5197769252
 Mass Fraction (H1): 0.4
@@ -27,28 +27,28 @@ Mass Fraction (n): 0.01111111111
 Mass Fraction (p): 0.01111111111
 electron fraction is 0.5
 After solving: 
-chemical potential of proton is -5.582021138
-chemical potential of neutron is -12.02471977
+chemical potential of proton is -5.581982204
+chemical potential of neutron is -12.02475811
 NSE state: 
 H1 : 0
-He3 : 6.728066857e-12
-He4 : 0.002681009009
-C12 : 2.088071581e-08
-N14 : 3.632384684e-13
-O16 : 8.47594681e-08
-Ne20 : 1.345962309e-09
-Mg24 : 5.594617425e-07
-Si28 : 0.001008098348
-S32 : 0.002046870355
-Ar36 : 0.002092144335
-Ca40 : 0.005240157608
-Ti44 : 0.0001839581247
-Cr48 : 0.002037991617
-Cr56 : 7.751434856e-22
-Fe52 : 0.03983919827
-Fe54 : 0.04497244709
-Fe56 : 1.686482626e-05
-Ni56 : 0.8982137413
-n : 9.442515318e-10
-p : 0.001666851761
-AMReX (23.12-21-gef38229189e3) finalized
+He3 : 6.726596474e-12
+He4 : 0.002680193707
+C12 : 2.088656806e-08
+N14 : 3.63235315e-13
+O16 : 8.472971401e-08
+Ne20 : 1.346294741e-09
+Mg24 : 5.59648245e-07
+Si28 : 0.001008039758
+S32 : 0.002046314586
+Ar36 : 0.002091462927
+Ca40 : 0.005238622254
+Ti44 : 0.0001839172999
+Cr48 : 0.002037708693
+Cr56 : 7.745054363e-22
+Fe52 : 0.03983694038
+Fe54 : 0.04496220187
+Fe56 : 1.685809858e-05
+Ni56 : 0.898230601
+n : 9.44170726e-10
+p : 0.001666471828
+AMReX (23.07-395-ge6c93bf22695) finalized
diff --git a/util/approx_math/Make.package b/util/approx_math/Make.package
@@ -0,0 +1 @@
+CEXE_headers += approx_math.H
diff --git a/util/approx_math/approx_math.H b/util/approx_math/approx_math.H
@@ -0,0 +1,84 @@
+#ifndef APPROX_MATH_H
+#define APPROX_MATH_H
+
+#include <AMReX_REAL.H>
+#include <microphysics_math.H>
+
+using namespace amrex::literals;
+
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+amrex::Real fast_atan_1(const amrex::Real x) {
+    ///
+    /// This calculates atan within [-1, 1] range.
+    ///
+
+    ///
+    /// Approximation of atan for x in [-1, 1]
+    /// Max absolute error for this is ~0.0015 rad
+    /// See Ref:
+    /// Efficient Approximations for the Arctangent Function by Rajan 2006
+    ///
+
+    // constexpr amrex::Real PI_4 = GCEM_PI / 4.0_rt;
+
+    // return PI_4*x - x*(std::abs(x) - 1.0_rt) *
+    //     (0.2447_rt + 0.0663_rt*std::abs(x));
+
+
+    ///
+    /// Another approximation of atan for x in [-1, 1]
+    /// Max absolute error for this is ~0.00063 rad
+    /// See Ref:
+    /// https://stackoverflow.com/questions/42537957/fast-accurate-atan-arctan-approximation-algorithm
+    ///
+
+    constexpr amrex::Real A = 0.0776509570923569_rt;
+    constexpr amrex::Real B = -0.287434475393028_rt;
+    constexpr amrex::Real C = GCEM_PI / 4.0_rt - A - B;
+
+    amrex::Real x2 = x*x;
+    return ((A*x2 + B)*x2 + C)*x;
+}
+
+
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+amrex::Real fast_atan(const amrex::Real x) {
+    ///
+    /// Fast atan approximation calculations.
+    ///
+
+    constexpr amrex::Real PI_2 = 0.5_rt * GCEM_PI;
+
+    ///
+    /// If x < 0.113, then using arctan(x) ~ x
+    /// gives better answer than the approximation below.
+    /// And accuracy increase as x << 0.113.
+    /// Also significantly faster.
+    ///
+
+    if (std::abs(x) < 0.113_rt) {
+        return x;
+    }
+
+    // Check for large number, close to infinity.
+    // Error is ~1e-8 rad by not checking actual inf
+
+    if (x > 1.e8_rt) {
+        return PI_2;
+    }
+    if (x < -1.e8_rt) {
+        return -PI_2;
+    }
+
+    // Now calculate Atan(x) using approximations
+
+    if (x > 1.0_rt) {
+        return PI_2 - fast_atan_1(1.0_rt / x);
+    }
+    if (x < -1.0_rt) {
+        return -PI_2 - fast_atan_1(1.0_rt / x);
+    }
+    return fast_atan_1(x);
+}
+
+#endif
diff --git a/util/approx_math/test_fast_atan/GNUmakefile b/util/approx_math/test_fast_atan/GNUmakefile
@@ -0,0 +1,39 @@
+PRECISION  = DOUBLE
+PROFILE    = FALSE
+
+DEBUG      = FALSE
+
+DIM        = 3
+
+COMP	   = gnu
+
+USE_MPI    = FALSE
+USE_OMP    = FALSE
+
+USE_REACT = TRUE
+
+EBASE = main
+
+# define the location of the Microphysics top directory
+MICROPHYSICS_HOME  ?= ../../..
+
+# This sets the EOS directory
+EOS_DIR     := helmholtz
+
+# This sets the network directory
+NETWORK_DIR := aprox21
+
+CONDUCTIVITY_DIR := stellar
+
+INTEGRATOR_DIR =  VODE
+
+ifeq ($(USE_CUDA), TRUE)
+  INTEGRATOR_DIR := VODE
+endif
+
+EXTERN_SEARCH += .
+
+Bpack   := ../Make.package ./Make.package
+Blocs   := ../ .
+
+include $(MICROPHYSICS_HOME)/unit_test/Make.unit_test
diff --git a/util/approx_math/test_fast_atan/Make.package b/util/approx_math/test_fast_atan/Make.package
@@ -0,0 +1 @@
+CEXE_sources += main.cpp
diff --git a/util/approx_math/test_fast_atan/main.cpp b/util/approx_math/test_fast_atan/main.cpp
@@ -0,0 +1,63 @@
+#include <test_fast_atan.H>
+
+int main() {
+
+    // Accuracy tests
+
+    // Test values including edge cases and some typical values
+
+    test_fast_atan_accuracy(0.0_rt);
+    test_fast_atan_accuracy(0.01_rt);
+    test_fast_atan_accuracy(0.08_rt);
+    test_fast_atan_accuracy(0.1_rt);
+    test_fast_atan_accuracy(0.5_rt);
+    test_fast_atan_accuracy(1.0_rt);
+    test_fast_atan_accuracy(5.0_rt);
+    test_fast_atan_accuracy(8.0_rt);
+    test_fast_atan_accuracy(10.0_rt);
+    test_fast_atan_accuracy(25.0_rt);
+    test_fast_atan_accuracy(100.0_rt);
+    test_fast_atan_accuracy(500.0_rt);
+    test_fast_atan_accuracy(5.0e8_rt);
+
+    test_fast_atan_accuracy(-0.01_rt);
+    test_fast_atan_accuracy(-0.08_rt);
+    test_fast_atan_accuracy(-0.1_rt);
+    test_fast_atan_accuracy(-0.5_rt);
+    test_fast_atan_accuracy(-1.0_rt);
+    test_fast_atan_accuracy(-5.0_rt);
+    test_fast_atan_accuracy(-8.0_rt);
+    test_fast_atan_accuracy(-10.0_rt);
+    test_fast_atan_accuracy(-25.0_rt);
+    test_fast_atan_accuracy(-100.0_rt);
+    test_fast_atan_accuracy(-500.0_rt);
+    test_fast_atan_accuracy(-5.0e8_rt);
+
+    // Inf Case
+
+    test_fast_atan_accuracy(std::numeric_limits<amrex::Real>::infinity());
+    test_fast_atan_accuracy(-std::numeric_limits<amrex::Real>::infinity());
+
+    std::cout << "Accuracy tests passed!" << std::endl;
+
+    // Now performance test
+
+    int iters = 5;
+    amrex::Real test_value = 160.0_rt;
+    test_fast_atan_speed(100, iters, test_value);
+
+    iters = 10;
+    test_fast_atan_speed(100, iters, test_value);
+
+    iters = 20;
+    test_fast_atan_speed(100, iters, test_value);
+
+    iters = 30;
+    test_fast_atan_speed(100, iters, test_value);
+
+    iters = 50;
+    test_fast_atan_speed(100, iters, test_value);
+
+    iters = 70;
+    test_fast_atan_speed(100, iters, test_value);
+}