Skip to content

Commit

Permalink
Merge pull request #254 from scality/eh/rework_simd
Browse files Browse the repository at this point in the history
Part2: Vectorize operations for Radix-2 FFT & re-work vectorization part
  • Loading branch information
lamphamsy authored Dec 20, 2018
2 parents 1cbb261 + 9c47e18 commit 1171bdb
Show file tree
Hide file tree
Showing 17 changed files with 1,742 additions and 1,734 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ include(GNUInstallDirs)
set(LIB_SRC
${SOURCE_DIR}/core.cpp
${SOURCE_DIR}/fec_vectorisation.cpp
${SOURCE_DIR}/fft_2n.cpp
${SOURCE_DIR}/misc.cpp
${SOURCE_DIR}/gf_nf4.cpp
${SOURCE_DIR}/gf_ring.cpp
Expand Down
2 changes: 0 additions & 2 deletions src/fec_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ static inline uint64_t hrtime_usec(timeval begin)
return 1000000 * (tv.tv_sec - begin.tv_sec) + tv.tv_usec - begin.tv_usec;
}

#define OOR_MARK 1

enum class FecType {
/** Systematic code
*
Expand Down
11 changes: 11 additions & 0 deletions src/fec_rs_fnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ class RsFnt : public FecCode<T> {
// decoding context used in encoding of systematic FNT
std::unique_ptr<DecodeContext<T>> enc_context;

// Indices used for accelerated functions
size_t simd_vec_len;
size_t simd_trailing_len;
size_t simd_offset;

public:
RsFnt(
FecType type,
Expand All @@ -70,6 +75,12 @@ class RsFnt : public FecCode<T> {
: FecCode<T>(type, word_size, n_data, n_parities, pkt_size)
{
this->fec_init();

// Indices used for accelerated functions
const unsigned ratio = simd::countof<T>();
simd_vec_len = this->pkt_size / ratio;
simd_trailing_len = this->pkt_size - simd_vec_len * ratio;
simd_offset = simd_vec_len * ratio;
}

inline void check_params() override
Expand Down
28 changes: 7 additions & 21 deletions src/fec_vectorisation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "fec_rs_fnt.h"

/*
* The file includes vectorized operations used by FEC classes
* The file includes specialized operations used by FEC classes
*/

#ifdef QUADIRON_USE_SIMD
Expand All @@ -53,20 +53,13 @@ void RsFnt<uint16_t>::encode_post_process(
uint16_t threshold = this->gf->card_minus_one();
unsigned code_len = this->n_outputs;

// number of elements per vector register
unsigned vec_size = simd::countof<uint16_t>();
// number of vector registers per fragment packet
size_t vecs_nb = size / vec_size;
// odd number of elements not vectorized
size_t last_len = size - vecs_nb * vec_size;

simd::encode_post_process(
output, props, offset, code_len, threshold, vecs_nb);
output, props, offset, code_len, threshold, simd_vec_len);

if (last_len > 0) {
if (simd_trailing_len > 0) {
for (unsigned i = 0; i < code_len; ++i) {
uint16_t* chunk = output.get(i);
for (size_t j = vecs_nb * vec_size; j < size; ++j) {
for (size_t j = simd_offset; j < size; ++j) {
if (chunk[j] == threshold) {
props[i].add(offset + j, OOR_MARK);
}
Expand All @@ -85,20 +78,13 @@ void RsFnt<uint32_t>::encode_post_process(
const uint32_t threshold = this->gf->card_minus_one();
const unsigned code_len = this->n_outputs;

// number of elements per vector register
const unsigned vec_size = simd::countof<uint32_t>();
// number of vector registers per fragment packet
const size_t vecs_nb = size / vec_size;
// odd number of elements not vectorized
const size_t last_len = size - vecs_nb * vec_size;

simd::encode_post_process(
output, props, offset, code_len, threshold, vecs_nb);
output, props, offset, code_len, threshold, simd_vec_len);

if (last_len > 0) {
if (simd_trailing_len > 0) {
for (unsigned i = 0; i < code_len; ++i) {
uint32_t* chunk = output.get(i);
for (size_t j = vecs_nb * vec_size; j < size; ++j) {
for (size_t j = simd_offset; j < size; ++j) {
if (chunk[j] == threshold) {
props[i].add(offset + j, OOR_MARK);
}
Expand Down
192 changes: 192 additions & 0 deletions src/fft_2n.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/* -*- mode: c++ -*- */
/*
* Copyright 2017-2018 Scality
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include "fft_2n.h"

/*
* The file includes vectorized operations used by Radix2 classes
*/

#ifdef QUADIRON_USE_SIMD

#include "simd.h"

namespace quadiron {
namespace fft {

template <>
void Radix2<uint16_t>::butterfly_ct_two_layers_step(
vec::Buffers<uint16_t>& buf,
unsigned start,
unsigned m)
{
const unsigned coefIndex = start * this->n / m / 2;
const uint16_t r1 = vec_W[coefIndex];
const uint16_t r2 = vec_W[coefIndex / 2];
const uint16_t r3 = vec_W[coefIndex / 2 + this->n / 4];

// perform vector operations
simd::butterfly_ct_two_layers_step(
buf, r1, r2, r3, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_ct_two_layers_step_slow(buf, start, m, simd_offset);
}
}

template <>
void Radix2<uint16_t>::butterfly_ct_step(
vec::Buffers<uint16_t>& buf,
uint16_t r,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_ct_step(buf, r, start, m, step, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_ct_step_slow(buf, r, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint16_t>::butterfly_gs_step(
vec::Buffers<uint16_t>& buf,
uint16_t coef,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_gs_step(buf, coef, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_gs_step_slow(buf, coef, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint16_t>::butterfly_gs_step_simple(
vec::Buffers<uint16_t>& buf,
uint16_t coef,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_gs_step_simple(buf, coef, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_gs_step_simple_slow(buf, coef, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint32_t>::butterfly_ct_two_layers_step(
vec::Buffers<uint32_t>& buf,
unsigned start,
unsigned m)
{
const unsigned coefIndex = start * this->n / m / 2;
const uint32_t r1 = vec_W[coefIndex];
const uint32_t r2 = vec_W[coefIndex / 2];
const uint32_t r3 = vec_W[coefIndex / 2 + this->n / 4];

// perform vector operations
simd::butterfly_ct_two_layers_step(
buf, r1, r2, r3, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_ct_two_layers_step_slow(buf, start, m, simd_offset);
}
}

template <>
void Radix2<uint32_t>::butterfly_ct_step(
vec::Buffers<uint32_t>& buf,
uint32_t r,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_ct_step(buf, r, start, m, step, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_ct_step_slow(buf, r, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint32_t>::butterfly_gs_step(
vec::Buffers<uint32_t>& buf,
uint32_t coef,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_gs_step(buf, coef, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_gs_step_slow(buf, coef, start, m, step, simd_offset);
}
}

template <>
void Radix2<uint32_t>::butterfly_gs_step_simple(
vec::Buffers<uint32_t>& buf,
uint32_t coef,
unsigned start,
unsigned m,
unsigned step)
{
// perform vector operations
simd::butterfly_gs_step_simple(buf, coef, start, m, simd_vec_len, card);

// for last elements, perform as non-SIMD method
if (simd_trailing_len > 0) {
butterfly_gs_step_simple_slow(buf, coef, start, m, step, simd_offset);
}
}

} // namespace fft
} // namespace quadiron

#endif // #ifdef QUADIRON_USE_SIMD
Loading

0 comments on commit 1171bdb

Please sign in to comment.