Skip to content

Commit

Permalink
implement a fast atan approximation function (#1583)
Browse files Browse the repository at this point in the history
implement a fast atan function using an approximate function for x in [-1, 1], for |x| > 1, the identity arctan(x) = sign(pi/2, x) - arctan(1/x) (Thanks to Eric) is used.

Two versions of the approximate functions are included.

1. Efficient Approximations for the Arctangent Function by Rajan 2006: Equation 9: Max error ~0.0015 rad

2. https://stackoverflow.com/questions/42537957/fast-accurate-atan-arctan-approximation-algorithm: Max error ~0.00063 rad

Generally I find the form from stackoverflow is more accurate and also faster. 1) is also discussed in 2).

In general, from testing, atanf is roughly more than 2 times faster than std::atan.
  • Loading branch information
zhichen3 authored Jun 22, 2024
1 parent 7d846d0 commit 134200e
Show file tree
Hide file tree
Showing 11 changed files with 686 additions and 403 deletions.
1 change: 1 addition & 0 deletions Make.Microphysics_extern
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ ifeq ($(USE_NSE_NET), TRUE)
EXTERN_CORE += $(MICROPHYSICS_HOME)/nse_solver
EXTERN_CORE += $(MICROPHYSICS_HOME)/util/hybrj
endif
EXTERN_CORE += $(MICROPHYSICS_HOME)/util/approx_math

# we can't have both NSE solvers
ifeq ($(USE_NSE_TABLE), TRUE)
Expand Down
5 changes: 3 additions & 2 deletions screening/screen.H
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cmath>
#include <screen_data.H>
#include <extern_parameters.H>
#include <approx_math.H>

using namespace amrex::literals;

Expand Down Expand Up @@ -660,7 +661,7 @@ void chugunov2009_f0 (const amrex::Real gamma, const amrex::Real dlog_dT, amrex:
dterm2_dgamma = 1.0_rt / (2.0_rt * term1);
}

term3 = gamma_12 - std::atan(gamma_12);
term3 = gamma_12 - fast_atan(gamma_12);
if constexpr (do_T_derivatives) {
dterm3_dgamma = 0.5_rt * gamma_12 / (1.0_rt + gamma);
}
Expand Down Expand Up @@ -817,7 +818,7 @@ void chabrier1998_helmholtz_F(const amrex::Real gamma, const amrex::Real dgamma_

f = A_1 * (sqrt_gamma_A2_gamma -
A_2 * std::log(sqrt_gamma_A2 + sqrt_1_gamma_A2)) +
2.0_rt * A_3 * (sqrt_gamma - std::atan(sqrt_gamma));
2.0_rt * A_3 * (sqrt_gamma - fast_atan(sqrt_gamma));

if constexpr (do_T_derivatives) {
df_dT = A_1 * ((A_2 + 2.0_rt * gamma) / (2.0_rt * sqrt_gamma_A2_gamma) -
Expand Down
51 changes: 26 additions & 25 deletions unit_test/test_ase/ci-benchmarks/ase_nse_net_unit_test.out
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
AMReX (23.05-4-ga393d7ff7e32) initialized
Initializing AMReX (23.07-395-ge6c93bf22695)...
AMReX (23.07-395-ge6c93bf22695) initialized
starting the single zone burn...
reading in network electron-capture / beta-decay tables...
chemical potential of proton is -3
Expand All @@ -7,28 +8,28 @@ State Density (g/cm^3): 10000000
State Temperature (K): 6000000000
electron fraction is 0.5
NSE state:
n : 0.0006075658986
H1 : 0.001992231785
He4 : 0.5191546413
C12 : 1.326178988e-05
N13 : 9.476614056e-11
N14 : 2.565265841e-09
O16 : 3.224715368e-05
F18 : 5.420149548e-11
Ne20 : 7.424231652e-07
Ne21 : 4.996678197e-08
Na22 : 2.606751428e-09
Na23 : 1.008351077e-06
Mg24 : 0.0001021744034
Al27 : 0.000554657875
Si28 : 0.03679926049
P31 : 0.04228638044
S32 : 0.03865876461
Ar36 : 0.02465328319
Ca40 : 0.02885085912
Ti44 : 0.001660132226
Cr48 : 0.009959852831
Fe52 : 0.05964589202
Ni56 : 0.2350269888
n : 0.0006075693495
H1 : 0.001992432534
He4 : 0.519007877
C12 : 1.326102e-05
N13 : 9.477376691e-11
N14 : 2.565486854e-09
O16 : 3.225455246e-05
F18 : 5.422064179e-11
Ne20 : 7.427509432e-07
Ne21 : 4.998912611e-08
Na22 : 2.608008186e-09
Na23 : 1.00884295e-06
Mg24 : 0.0001022222509
Al27 : 0.0005548826609
Si28 : 0.03680966846
P31 : 0.04229223791
S32 : 0.03865771985
Ar36 : 0.02464742443
Ca40 : 0.02885152622
Ti44 : 0.00166123105
Cr48 : 0.009967017598
Fe52 : 0.05968005236
Ni56 : 0.2351208159
We're in NSE.
AMReX (23.05-4-ga393d7ff7e32) finalized
AMReX (23.07-395-ge6c93bf22695) finalized

Large diffs are not rendered by default.

52 changes: 26 additions & 26 deletions unit_test/test_nse/ci-benchmarks/aprox21_ci.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Initializing AMReX (23.12-21-gef38229189e3)...
AMReX (23.12-21-gef38229189e3) initialized
Initializing AMReX (23.07-395-ge6c93bf22695)...
AMReX (23.07-395-ge6c93bf22695) initialized
starting the single zone burn...
0x7ffcb5676e40
0x7ffee3940620
State Density (g/cm^3): 277355338.4
State Temperature (K): 5197769252
Mass Fraction (H1): 0.4
Expand All @@ -27,28 +27,28 @@ Mass Fraction (n): 0.01111111111
Mass Fraction (p): 0.01111111111
electron fraction is 0.5
After solving:
chemical potential of proton is -5.582021138
chemical potential of neutron is -12.02471977
chemical potential of proton is -5.581982204
chemical potential of neutron is -12.02475811
NSE state:
H1 : 0
He3 : 6.728066857e-12
He4 : 0.002681009009
C12 : 2.088071581e-08
N14 : 3.632384684e-13
O16 : 8.47594681e-08
Ne20 : 1.345962309e-09
Mg24 : 5.594617425e-07
Si28 : 0.001008098348
S32 : 0.002046870355
Ar36 : 0.002092144335
Ca40 : 0.005240157608
Ti44 : 0.0001839581247
Cr48 : 0.002037991617
Cr56 : 7.751434856e-22
Fe52 : 0.03983919827
Fe54 : 0.04497244709
Fe56 : 1.686482626e-05
Ni56 : 0.8982137413
n : 9.442515318e-10
p : 0.001666851761
AMReX (23.12-21-gef38229189e3) finalized
He3 : 6.726596474e-12
He4 : 0.002680193707
C12 : 2.088656806e-08
N14 : 3.63235315e-13
O16 : 8.472971401e-08
Ne20 : 1.346294741e-09
Mg24 : 5.59648245e-07
Si28 : 0.001008039758
S32 : 0.002046314586
Ar36 : 0.002091462927
Ca40 : 0.005238622254
Ti44 : 0.0001839172999
Cr48 : 0.002037708693
Cr56 : 7.745054363e-22
Fe52 : 0.03983694038
Fe54 : 0.04496220187
Fe56 : 1.685809858e-05
Ni56 : 0.898230601
n : 9.44170726e-10
p : 0.001666471828
AMReX (23.07-395-ge6c93bf22695) finalized
1 change: 1 addition & 0 deletions util/approx_math/Make.package
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CEXE_headers += approx_math.H
84 changes: 84 additions & 0 deletions util/approx_math/approx_math.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#ifndef APPROX_MATH_H
#define APPROX_MATH_H

#include <AMReX_REAL.H>
#include <microphysics_math.H>

using namespace amrex::literals;

AMREX_GPU_HOST_DEVICE AMREX_INLINE
amrex::Real fast_atan_1(const amrex::Real x) {
///
/// This calculates atan within [-1, 1] range.
///

///
/// Approximation of atan for x in [-1, 1]
/// Max absolute error for this is ~0.0015 rad
/// See Ref:
/// Efficient Approximations for the Arctangent Function by Rajan 2006
///

// constexpr amrex::Real PI_4 = GCEM_PI / 4.0_rt;

// return PI_4*x - x*(std::abs(x) - 1.0_rt) *
// (0.2447_rt + 0.0663_rt*std::abs(x));


///
/// Another approximation of atan for x in [-1, 1]
/// Max absolute error for this is ~0.00063 rad
/// See Ref:
/// https://stackoverflow.com/questions/42537957/fast-accurate-atan-arctan-approximation-algorithm
///

constexpr amrex::Real A = 0.0776509570923569_rt;
constexpr amrex::Real B = -0.287434475393028_rt;
constexpr amrex::Real C = GCEM_PI / 4.0_rt - A - B;

amrex::Real x2 = x*x;
return ((A*x2 + B)*x2 + C)*x;
}


AMREX_GPU_HOST_DEVICE AMREX_INLINE
amrex::Real fast_atan(const amrex::Real x) {
///
/// Fast atan approximation calculations.
///

constexpr amrex::Real PI_2 = 0.5_rt * GCEM_PI;

///
/// If x < 0.113, then using arctan(x) ~ x
/// gives better answer than the approximation below.
/// And accuracy increase as x << 0.113.
/// Also significantly faster.
///

if (std::abs(x) < 0.113_rt) {
return x;
}

// Check for large number, close to infinity.
// Error is ~1e-8 rad by not checking actual inf

if (x > 1.e8_rt) {
return PI_2;
}
if (x < -1.e8_rt) {
return -PI_2;
}

// Now calculate Atan(x) using approximations

if (x > 1.0_rt) {
return PI_2 - fast_atan_1(1.0_rt / x);
}
if (x < -1.0_rt) {
return -PI_2 - fast_atan_1(1.0_rt / x);
}
return fast_atan_1(x);
}

#endif
39 changes: 39 additions & 0 deletions util/approx_math/test_fast_atan/GNUmakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
PRECISION = DOUBLE
PROFILE = FALSE

DEBUG = FALSE

DIM = 3

COMP = gnu

USE_MPI = FALSE
USE_OMP = FALSE

USE_REACT = TRUE

EBASE = main

# define the location of the Microphysics top directory
MICROPHYSICS_HOME ?= ../../..

# This sets the EOS directory
EOS_DIR := helmholtz

# This sets the network directory
NETWORK_DIR := aprox21

CONDUCTIVITY_DIR := stellar

INTEGRATOR_DIR = VODE

ifeq ($(USE_CUDA), TRUE)
INTEGRATOR_DIR := VODE
endif

EXTERN_SEARCH += .

Bpack := ../Make.package ./Make.package
Blocs := ../ .

include $(MICROPHYSICS_HOME)/unit_test/Make.unit_test
1 change: 1 addition & 0 deletions util/approx_math/test_fast_atan/Make.package
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CEXE_sources += main.cpp
63 changes: 63 additions & 0 deletions util/approx_math/test_fast_atan/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <test_fast_atan.H>

int main() {

// Accuracy tests

// Test values including edge cases and some typical values

test_fast_atan_accuracy(0.0_rt);
test_fast_atan_accuracy(0.01_rt);
test_fast_atan_accuracy(0.08_rt);
test_fast_atan_accuracy(0.1_rt);
test_fast_atan_accuracy(0.5_rt);
test_fast_atan_accuracy(1.0_rt);
test_fast_atan_accuracy(5.0_rt);
test_fast_atan_accuracy(8.0_rt);
test_fast_atan_accuracy(10.0_rt);
test_fast_atan_accuracy(25.0_rt);
test_fast_atan_accuracy(100.0_rt);
test_fast_atan_accuracy(500.0_rt);
test_fast_atan_accuracy(5.0e8_rt);

test_fast_atan_accuracy(-0.01_rt);
test_fast_atan_accuracy(-0.08_rt);
test_fast_atan_accuracy(-0.1_rt);
test_fast_atan_accuracy(-0.5_rt);
test_fast_atan_accuracy(-1.0_rt);
test_fast_atan_accuracy(-5.0_rt);
test_fast_atan_accuracy(-8.0_rt);
test_fast_atan_accuracy(-10.0_rt);
test_fast_atan_accuracy(-25.0_rt);
test_fast_atan_accuracy(-100.0_rt);
test_fast_atan_accuracy(-500.0_rt);
test_fast_atan_accuracy(-5.0e8_rt);

// Inf Case

test_fast_atan_accuracy(std::numeric_limits<amrex::Real>::infinity());
test_fast_atan_accuracy(-std::numeric_limits<amrex::Real>::infinity());

std::cout << "Accuracy tests passed!" << std::endl;

// Now performance test

int iters = 5;
amrex::Real test_value = 160.0_rt;
test_fast_atan_speed(100, iters, test_value);

iters = 10;
test_fast_atan_speed(100, iters, test_value);

iters = 20;
test_fast_atan_speed(100, iters, test_value);

iters = 30;
test_fast_atan_speed(100, iters, test_value);

iters = 50;
test_fast_atan_speed(100, iters, test_value);

iters = 70;
test_fast_atan_speed(100, iters, test_value);
}
Loading

0 comments on commit 134200e

Please sign in to comment.