From 06327876e83706d8eecba02a3f4d4695a786cff2 Mon Sep 17 00:00:00 2001 From: Marcin Wojdyr Date: Sun, 29 Oct 2023 20:06:08 +0100 Subject: [PATCH] use modified expapprox() from jhjourdan/SIMD-math-prims for calculating sum of gaussians in in ExpSum and ExpAnisoSum --- docs/hkl.rst | 6 ++-- include/gemmi/formfact.hpp | 62 ++++++++++++++++++++++++++++++++++++++ tests/test_prog.py | 4 +-- 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/docs/hkl.rst b/docs/hkl.rst index b980266d..af49d5a5 100644 --- a/docs/hkl.rst +++ b/docs/hkl.rst @@ -2127,7 +2127,7 @@ into a structure factor grid: >>> sf_grid >>> sf_grid.get_value(3, 4, 5) - (54.536354064941406+53.37517547607422j) + (54.53636932373047+53.37513732910156j) In addition to ``d_min`` and ``rate``, which govern the grid density, DensityCalculator has two more parameters that affect accuracy @@ -2211,7 +2211,7 @@ We either multiply individual values by ``mott_bethe_factor()`` .. doctest:: >>> dc.mott_bethe_factor([3,4,5]) * grid.get_value(3,4,5) - (54.06309366876474+52.971226494748116j) + (54.06278054674026+52.97144710344719j) or we call ``prepare_asu_data()`` with ``mott_bethe=True``: @@ -2220,7 +2220,7 @@ or we call ``prepare_asu_data()`` with ``mott_bethe=True``: >>> asu_data = grid.prepare_asu_data(dmin=2.5, mott_bethe=True, unblur=dencalc.blur) >>> asu_data.value_array[numpy.all(asu_data.miller_array == [3,4,5], axis=1)] - array([54.06309+52.971226j], dtype=complex64) + array([54.06278+52.971447j], dtype=complex64) That is all. If you would like to separate positions of hydrogen nuclei diff --git a/include/gemmi/formfact.hpp b/include/gemmi/formfact.hpp index a19fffcd..3c8374c2 100644 --- a/include/gemmi/formfact.hpp +++ b/include/gemmi/formfact.hpp @@ -13,6 +13,24 @@ namespace gemmi { +// NOTE: the argument x must be between -88 and 88. +// It is based on expapprox() from +// https://github.com/jhjourdan/SIMD-math-prims/blob/master/simd_math_prims.h +// Relative error is below 1e-5. +inline float unsafe_expapprox(float x) { + //static float zero = 0.f; // non-const to disable optimization + float val = 12102203.1615614f * x + 1065353216.f; + //val = std::max(val, zero); // check if x < -88.02969 + int32_t vali = static_cast(val); + std::int32_t xu1 = vali & 0x7F800000; + std::int32_t xu2 = (vali & 0x7FFFFF) | 0x3F800000; + float a, b; + std::memcpy(&a, &xu1, 4); + std::memcpy(&b, &xu2, 4); + return a * (0.509871020f + b * (0.312146713f + b * (0.166617139f + b * + (-2.190619930e-3f + b * 1.3555747234e-2f)))); +} + // precalculated density of an isotropic atom template struct ExpSum { @@ -37,6 +55,34 @@ struct ExpSum { } }; +template +struct ExpSum { + float a[N], b[N]; + float calculate(float r2) const { + float density = 0; + float tmp[N]; + for (int i = 0; i < N; ++i) + tmp[i] = std::max(b[i] * r2, -88.f); + for (int i = 0; i < N; ++i) + density += a[i] * unsafe_expapprox(tmp[i]); + return density; + } + + std::pair calculate_with_derivative(float r) const { + float density = 0; + float derivative = 0; + float tmp[N]; + for (int i = 0; i < N; ++i) + tmp[i] = std::max(b[i] * (r * r), -88.f); + for (int i = 0; i < N; ++i) { + float y = a[i] * unsafe_expapprox(tmp[i]); + density += y; + derivative += 2 * b[i] * r * y; + } + return std::make_pair(density, derivative); + } +}; + // precalculated density of an anisotropic atom template struct ExpAnisoSum { @@ -51,6 +97,22 @@ struct ExpAnisoSum { } }; +template +struct ExpAnisoSum { + float a[N]; + SMat33 b[N]; + + float calculate(const Vec3& r) const { + float density = 0; + float tmp[N]; + for (int i = 0; i < N; ++i) + tmp[i] = std::max((float)b[i].r_u_r(r), -88.f); + for (int i = 0; i < N; ++i) + density += a[i] * unsafe_expapprox(tmp[i]); + return density; + } +}; + template constexpr Real pow15(Real x) { return x * std::sqrt(x); } diff --git a/tests/test_prog.py b/tests/test_prog.py index 105daf7a..cfaf9252 100755 --- a/tests/test_prog.py +++ b/tests/test_prog.py @@ -90,7 +90,7 @@ def test_sfcalc_5wkd(self): self.do('''\ $ gemmi sfcalc --blur=12 --dmin=2.5 --rate=2.5 --rcut=1e-7 --test -v tests/5wkd.pdb [...] -RMSE=2.5037e-05 3.544e-05% max|dF|=0.0001476 R=0.000% =3.694e-06 +RMSE=5.7890e-05 8.194e-05% max|dF|=0.0001430 R=0.000% =1.158e-05 ''') # noqa: E501 @unittest.skipIf(sys.platform == 'win32', 'with MSVC it differs slightly') @@ -98,7 +98,7 @@ def test_sfcalc_1pfe(self): self.do('''\ $ gemmi sfcalc --dmin=9 --rate=4 --blur=60 --rcut=1e-7 --test -v tests/1pfe.cif.gz [...] -RMSE=0.00049055 7.721e-05% max|dF|=0.002941 R=0.000% =1.895e-06 +RMSE=0.00050535 7.954e-05% max|dF|=0.002944 R=0.000% =2.615e-06 ''') # noqa: E501 # example from utils.rst