Skip to content

Commit

Permalink
Overload of binary perform_parallel_simd_aligned writes one vector
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul-Licameli committed Oct 10, 2023
1 parent 9ea380e commit 555a719
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
23 changes: 23 additions & 0 deletions libraries/lib-time-and-pitch/StaffPad/SimdTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,29 @@ __finl void perform_parallel_simd_aligned(float *a, float *b, int n, const fnc &
f(a[i], b[i]);
}

// two buffers read, one written
template <typename fnc>
__finl void perform_parallel_simd_aligned(float *a, const float *b, int n, const fnc &f)
{
// fnc& f needs to be a lambda of type [](auto &a, const auto &b){}.
// the autos will be float_x4/float
constexpr int N = 4;
constexpr int byte_size = sizeof(float);

assert(is_aligned(a, N * byte_size) && is_aligned(b, N * byte_size));

for (int i = 0; i <= n - N; i += N)
{
auto x = float_x4_load_aligned(a + i);
auto y = float_x4_load_aligned(b + i);
f(x, y);
store_aligned(x, a + i);
}
// deal with last partial packet
for (int i = n & (~(N - 1)); i < n; ++i)
f(a[i], b[i]);
}

/// template for applying math to one data buffer
template <typename fnc>
__finl void perform_parallel_simd_aligned(float *a, int n, const fnc &f)
Expand Down
2 changes: 1 addition & 1 deletion libraries/lib-time-and-pitch/StaffPad/TimeAndPitch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ void _ms_to_lr(float* ch1, float* ch2, int n)
template <class T>
inline void multiply(T* dst, const T* src, int32_t n)
{
audio::simd::perform_parallel_simd_aligned(dst, const_cast<T*>(src), n,
audio::simd::perform_parallel_simd_aligned(dst, src, n,
[](auto& d, auto& s) { d = d * s; });
}
} // namespace
Expand Down

0 comments on commit 555a719

Please sign in to comment.