Skip to content

Commit

Permalink
use modified expapprox() from jhjourdan/SIMD-math-prims
Browse files Browse the repository at this point in the history
for calculating sum of gaussians in in ExpSum<N, float>
and ExpAnisoSum<N, float>
  • Loading branch information
wojdyr committed Oct 29, 2023
1 parent d2480af commit 0632787
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 5 deletions.
6 changes: 3 additions & 3 deletions docs/hkl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2127,7 +2127,7 @@ into a structure factor grid:
>>> sf_grid
<gemmi.ReciprocalComplexGrid(48, 50, 50)>
>>> sf_grid.get_value(3, 4, 5)
(54.536354064941406+53.37517547607422j)
(54.53636932373047+53.37513732910156j)

In addition to ``d_min`` and ``rate``, which govern the grid density,
DensityCalculator has two more parameters that affect accuracy
Expand Down Expand Up @@ -2211,7 +2211,7 @@ We either multiply individual values by ``mott_bethe_factor()``
.. doctest::

>>> dc.mott_bethe_factor([3,4,5]) * grid.get_value(3,4,5)
(54.06309366876474+52.971226494748116j)
(54.06278054674026+52.97144710344719j)

or we call ``prepare_asu_data()`` with ``mott_bethe=True``:

Expand All @@ -2220,7 +2220,7 @@ or we call ``prepare_asu_data()`` with ``mott_bethe=True``:

>>> asu_data = grid.prepare_asu_data(dmin=2.5, mott_bethe=True, unblur=dencalc.blur)
>>> asu_data.value_array[numpy.all(asu_data.miller_array == [3,4,5], axis=1)]
array([54.06309+52.971226j], dtype=complex64)
array([54.06278+52.971447j], dtype=complex64)

That is all.
If you would like to separate positions of hydrogen nuclei
Expand Down
62 changes: 62 additions & 0 deletions include/gemmi/formfact.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,24 @@

namespace gemmi {

// NOTE: the argument x must be between -88 and 88.
// It is based on expapprox() from
// https://github.com/jhjourdan/SIMD-math-prims/blob/master/simd_math_prims.h
// Relative error is below 1e-5.
inline float unsafe_expapprox(float x) {
//static float zero = 0.f; // non-const to disable optimization
float val = 12102203.1615614f * x + 1065353216.f;
//val = std::max(val, zero); // check if x < -88.02969
int32_t vali = static_cast<std::int32_t>(val);
std::int32_t xu1 = vali & 0x7F800000;
std::int32_t xu2 = (vali & 0x7FFFFF) | 0x3F800000;
float a, b;
std::memcpy(&a, &xu1, 4);
std::memcpy(&b, &xu2, 4);
return a * (0.509871020f + b * (0.312146713f + b * (0.166617139f + b *
(-2.190619930e-3f + b * 1.3555747234e-2f))));
}

// precalculated density of an isotropic atom
template<int N, typename Real>
struct ExpSum {
Expand All @@ -37,6 +55,34 @@ struct ExpSum {
}
};

template<int N>
struct ExpSum<N, float> {
float a[N], b[N];
float calculate(float r2) const {
float density = 0;
float tmp[N];
for (int i = 0; i < N; ++i)
tmp[i] = std::max(b[i] * r2, -88.f);
for (int i = 0; i < N; ++i)
density += a[i] * unsafe_expapprox(tmp[i]);
return density;
}

std::pair<float,float> calculate_with_derivative(float r) const {
float density = 0;
float derivative = 0;
float tmp[N];
for (int i = 0; i < N; ++i)
tmp[i] = std::max(b[i] * (r * r), -88.f);
for (int i = 0; i < N; ++i) {
float y = a[i] * unsafe_expapprox(tmp[i]);
density += y;
derivative += 2 * b[i] * r * y;
}
return std::make_pair(density, derivative);
}
};

// precalculated density of an anisotropic atom
template<int N, typename Real>
struct ExpAnisoSum {
Expand All @@ -51,6 +97,22 @@ struct ExpAnisoSum {
}
};

template<int N>
struct ExpAnisoSum<N, float> {
float a[N];
SMat33<float> b[N];

float calculate(const Vec3& r) const {
float density = 0;
float tmp[N];
for (int i = 0; i < N; ++i)
tmp[i] = std::max((float)b[i].r_u_r(r), -88.f);
for (int i = 0; i < N; ++i)
density += a[i] * unsafe_expapprox(tmp[i]);
return density;
}
};

template<typename Real>
constexpr Real pow15(Real x) { return x * std::sqrt(x); }

Expand Down
4 changes: 2 additions & 2 deletions tests/test_prog.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,15 @@ def test_sfcalc_5wkd(self):
self.do('''\
$ gemmi sfcalc --blur=12 --dmin=2.5 --rate=2.5 --rcut=1e-7 --test -v tests/5wkd.pdb
[...]
RMSE=2.5037e-05 3.544e-05% max|dF|=0.0001476 R=0.000% <dPhi>=3.694e-06
RMSE=5.7890e-05 8.194e-05% max|dF|=0.0001430 R=0.000% <dPhi>=1.158e-05
''') # noqa: E501

@unittest.skipIf(sys.platform == 'win32', 'with MSVC it differs slightly')
def test_sfcalc_1pfe(self):
self.do('''\
$ gemmi sfcalc --dmin=9 --rate=4 --blur=60 --rcut=1e-7 --test -v tests/1pfe.cif.gz
[...]
RMSE=0.00049055 7.721e-05% max|dF|=0.002941 R=0.000% <dPhi>=1.895e-06
RMSE=0.00050535 7.954e-05% max|dF|=0.002944 R=0.000% <dPhi>=2.615e-06
''') # noqa: E501

# example from utils.rst
Expand Down

0 comments on commit 0632787

Please sign in to comment.