Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try to enhance performance of FNT(257) #264

Merged
merged 6 commits into from
Mar 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions scripts/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,23 @@ fi
chunk_size=51200
# for rs-fnt with different packet sizes
word_size=2
for ec_type in rs-fnt rs-fnt-sys; do
for k in 16; do
for n in 256 1024; do
m=$((n-k))
for pkt_size in 256 512 1024; do
${bin} -e ${ec_type} -w ${word_size} -k ${k} -m ${m} -c ${chunk_size} -s ${sce_type} -g ${threads_nb} -f ${show_type} -p ${pkt_size} -n ${samples_nb}
done
for word_size in 1 2; do
for type_size in 2 4; do
max_len=$((256**word_size))
if ((type_size>word_size)); then
for ec_type in rs-fnt rs-fnt-sys; do
for k in 16 64; do
for n in 32 256 1024; do
if ((n<max_len)) && ((n>k)); then
m=$((n-k))
for pkt_size in 512; do
${bin} -e ${ec_type} -w ${word_size} -t ${type_size} -k ${k} -m ${m} -c ${chunk_size} -s ${sce_type} -g ${threads_nb} -f ${show_type} -p ${pkt_size} -n ${samples_nb}
show_type=0
done
fi
done
done
done
fi
done
done
done
115 changes: 64 additions & 51 deletions src/fec_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,14 @@ class FecCode {
* if NON_SYSTEMATIC get_n_outputs()
*/
virtual void decode(
const DecodeContext<T>& context,
DecodeContext<T>& context,
vec::Vector<T>& output,
const std::vector<Properties>& props,
off_t offset,
vec::Vector<T>& words);

virtual void decode(
const DecodeContext<T>& context,
DecodeContext<T>& context,
vec::Buffers<T>& output,
const std::vector<Properties>& props,
off_t offset,
Expand All @@ -189,18 +189,19 @@ class FecCode {
bool decode_streams_horizontal(
std::vector<std::istream*> input_data_bufs,
std::vector<std::istream*> input_parities_bufs,
const std::vector<Properties>& input_parities_props,
std::vector<Properties>& input_parities_props,
std::vector<std::ostream*> output_data_bufs);

virtual std::unique_ptr<DecodeContext<T>> init_context_dec(
vec::Vector<T>& fragments_ids,
std::vector<Properties>& input_props,
size_t size = 0,
vec::Buffers<T>* output = nullptr);

bool decode_streams_vertical(
std::vector<std::istream*> input_data_bufs,
std::vector<std::istream*> input_parities_bufs,
const std::vector<Properties>& input_parities_props,
std::vector<Properties>& input_parities_props,
std::vector<std::ostream*> output_data_bufs);

void encode_blocks_vertical(
Expand All @@ -213,7 +214,7 @@ class FecCode {
bool decode_blocks_vertical(
std::vector<uint8_t*> data_bufs,
std::vector<uint8_t*> parities_bufs,
const std::vector<Properties>& parities_props,
std::vector<Properties>& parities_props,
std::vector<int> missing_idxs,
std::vector<bool> wanted_idxs,
size_t block_size_bytes);
Expand Down Expand Up @@ -270,24 +271,24 @@ class FecCode {
}

virtual void decode_prepare(
const DecodeContext<T>& context,
DecodeContext<T>& context,
const std::vector<Properties>& props,
off_t offset,
vec::Vector<T>& words);

virtual void decode_apply(
const DecodeContext<T>& context,
DecodeContext<T>& context,
vec::Vector<T>& output,
vec::Vector<T>& words);

virtual void decode_prepare(
const DecodeContext<T>& context,
DecodeContext<T>& context,
const std::vector<Properties>& props,
off_t offset,
vec::Buffers<T>& words);

virtual void decode_apply(
const DecodeContext<T>& context,
DecodeContext<T>& context,
vec::Buffers<T>& output,
vec::Buffers<T>& words);
};
Expand Down Expand Up @@ -440,16 +441,12 @@ void FecCode<T>::encode_streams_horizontal(
if (!cont)
break;

// std::cout << "words at " << offset << ": "; words.dump();

timeval t1 = tick();
uint64_t start = hw_timer();
encode(output, output_parities_props, offset, words);
uint64_t end = hw_timer();
uint64_t t2 = hrtime_usec(t1);

// std::cout << "output: "; output.dump();

total_enc_usec += t2;
total_encode_cycles += (end - start) / word_size;
n_encode_ops++;
Expand Down Expand Up @@ -563,7 +560,7 @@ template <typename T>
bool FecCode<T>::decode_streams_horizontal(
std::vector<std::istream*> input_data_bufs,
std::vector<std::istream*> input_parities_bufs,
const std::vector<Properties>& input_parities_props,
std::vector<Properties>& input_parities_props,
std::vector<std::ostream*> output_data_bufs)
{
off_t offset = 0;
Expand Down Expand Up @@ -630,7 +627,8 @@ bool FecCode<T>::decode_streams_horizontal(
vec::Vector<T> words(*(this->gf), n_words);
vec::Vector<T> output(*(this->gf), n_data);

std::unique_ptr<DecodeContext<T>> context = init_context_dec(fragments_ids);
std::unique_ptr<DecodeContext<T>> context =
init_context_dec(fragments_ids, input_parities_props);
while (true) {
words.zero_fill();
if (type == FecType::SYSTEMATIC) {
Expand Down Expand Up @@ -740,7 +738,7 @@ bool FecCode<T>::decode_streams_horizontal(
*/
template <typename T>
void FecCode<T>::decode(
const DecodeContext<T>& context,
DecodeContext<T>& context,
vec::Vector<T>& output,
const std::vector<Properties>& props,
off_t offset,
Expand All @@ -759,6 +757,7 @@ void FecCode<T>::decode(
template <typename T>
std::unique_ptr<DecodeContext<T>> FecCode<T>::init_context_dec(
vec::Vector<T>& fragments_ids,
std::vector<Properties>& input_props,
size_t size,
vec::Buffers<T>* output)
{
Expand All @@ -780,30 +779,42 @@ std::unique_ptr<DecodeContext<T>> FecCode<T>::init_context_dec(
}

return std::make_unique<DecodeContext<T>>(
*gf, *fft, *fft_2k, fragments_ids, vx, n_data, n, -1, size, output);
*gf,
*fft,
*fft_2k,
fragments_ids,
input_props,
vx,
n_data,
n,
-1,
size,
output);
}

/* Prepare for decoding
* It supports for FEC using multiplicative FFT over FNT
*/
template <typename T>
void FecCode<T>::decode_prepare(
const DecodeContext<T>& context,
DecodeContext<T>& context,
const std::vector<Properties>& props,
off_t offset,
vec::Vector<T>& words)
{
const vec::Vector<T>& fragments_ids = context.get_fragments_id();
for (unsigned i = 0; i < this->n_data; ++i) {
const int j = fragments_ids.get(i);
auto data = props[j].get(offset);

// Check if the symbol is a special case whick is marked by `OOR_MARK`,
// i.e. true. Note: this check is necessary when word_size is not large
// enough to cover all symbols of the field. Following check is used for
// FFT over FNT where the single special case symbol equals card - 1
if (data == OOR_MARK) {
words.set(i, this->gf->card() - 1);
if (props[j].is_marked(context.props_indices[j], offset)) {
// Check if the symbol is a special case whick is marked by
// `OOR_MARK`, i.e. true. Note: this check is necessary when
// word_size is not large enough to cover all symbols of the field.
// Following check is used for FFT over FNT where the single special
// case symbol equals card - 1
if (props[j].marker(context.props_indices[j]) == OOR_MARK) {
words.set(i, this->gf->card() - 1);
}
context.props_indices.at(j)++;
}
}
}
Expand All @@ -818,7 +829,7 @@ void FecCode<T>::decode_prepare(
*/
template <typename T>
void FecCode<T>::decode_apply(
const DecodeContext<T>& context,
DecodeContext<T>& context,
vec::Vector<T>& output,
vec::Vector<T>& words)
{
Expand Down Expand Up @@ -887,7 +898,7 @@ template <typename T>
bool FecCode<T>::decode_streams_vertical(
std::vector<std::istream*> input_data_bufs,
std::vector<std::istream*> input_parities_bufs,
const std::vector<Properties>& input_parities_props,
std::vector<Properties>& input_parities_props,
std::vector<std::ostream*> output_data_bufs)
{
bool cont = true;
Expand Down Expand Up @@ -962,8 +973,8 @@ bool FecCode<T>::decode_streams_vertical(
vec::Buffers<char> output_char(output_len, buf_size);
const std::vector<char*> output_mem_char = output_char.get_mem();

std::unique_ptr<DecodeContext<T>> context =
init_context_dec(fragments_ids, pkt_size, &output);
std::unique_ptr<DecodeContext<T>> context = init_context_dec(
fragments_ids, input_parities_props, pkt_size, &output);

reset_stats_dec();

Expand Down Expand Up @@ -1166,7 +1177,7 @@ template <typename T>
bool FecCode<T>::decode_blocks_vertical(
std::vector<uint8_t*> data_bufs,
std::vector<uint8_t*> parities_bufs,
const std::vector<Properties>& parities_props,
std::vector<Properties>& parities_props,
std::vector<int> missing_idxs,
std::vector<bool> wanted_idxs,
size_t block_size_bytes)
Expand Down Expand Up @@ -1243,7 +1254,7 @@ bool FecCode<T>::decode_blocks_vertical(
const std::vector<uint8_t*> output_mem_char = output_char.get_mem();

std::unique_ptr<DecodeContext<T>> context =
init_context_dec(fragments_ids, pkt_size, &output);
init_context_dec(fragments_ids, parities_props, pkt_size, &output);

reset_stats_dec();

Expand Down Expand Up @@ -1317,14 +1328,13 @@ bool FecCode<T>::decode_blocks_vertical(
*
* @param context decoding context
* @param output must be exactly n_data
* @param props special values dictionary must be exactly n_data
* @param offset used to locate special values
* @param words vector \f$v=(v_0, v_1, ..., v_{k-1})\f$, \f$k\f$ must be exactly
* n_data
*/
template <typename T>
void FecCode<T>::decode(
const DecodeContext<T>& context,
DecodeContext<T>& context,
vec::Buffers<T>& output,
const std::vector<Properties>& props,
off_t offset,
Expand All @@ -1349,7 +1359,7 @@ void FecCode<T>::decode(
*/
template <typename T>
void FecCode<T>::decode_prepare(
const DecodeContext<T>& context,
DecodeContext<T>& context,
const std::vector<Properties>& props,
off_t offset,
vec::Buffers<T>& words)
Expand All @@ -1369,23 +1379,26 @@ void FecCode<T>::decode_prepare(
if (type == FecType::SYSTEMATIC) {
frag_id -= this->n_data;
}

// loop over marked symbols
for (auto const& data : props[frag_id].get_map()) {
off_t loc_offset = data.first;
if (loc_offset >= offset && loc_offset < offset_max) {
// As loc.offset := offset + j
const size_t j = (loc_offset - offset);

// Check if the symbol is a special case whick is marked by
// `OOR_MARK`.
// Note: this check is necessary when word_size is not large
// enough to cover all symbols of the field. Following check is
// used for FFT over FNT where the single special case symbol
// equals card - 1
if (data.second == OOR_MARK) {
chunk[j] = thres;
}
while (props[frag_id].in_range(
context.props_indices.at(frag_id), offset, offset_max)) {
const size_t loc_offset =
props[frag_id].location(context.props_indices.at(frag_id));
// As loc.offset := offset + j
const size_t j = (loc_offset - offset);

// Check if the symbol is a special case whick is marked by
// `OOR_MARK`.
// Note: this check is necessary when word_size is not large
// enough to cover all symbols of the field. Following check is
// used for FFT over FNT where the single special case symbol
// equals card - 1
if (props[frag_id].marker(context.props_indices.at(frag_id))
== OOR_MARK) {
chunk[j] = thres;
}
context.props_indices.at(frag_id)++;
}
}
}
Expand All @@ -1403,7 +1416,7 @@ void FecCode<T>::decode_prepare(
*/
template <typename T>
void FecCode<T>::decode_apply(
const DecodeContext<T>& context,
DecodeContext<T>& context,
vec::Buffers<T>& output,
vec::Buffers<T>& words)
{
Expand Down
10 changes: 10 additions & 0 deletions src/fec_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "fft_base.h"
#include "gf_base.h"
#include "gf_nf4.h"
#include "property.h"
lamphamsy marked this conversation as resolved.
Show resolved Hide resolved
#include "vec_poly.h"
#include "vec_zero_ext.h"

Expand All @@ -67,12 +68,14 @@ class DecodeContext {
fft::FourierTransform<T>& fft,
fft::FourierTransform<T>& fft_2k,
const vec::Vector<T>& fragments_ids,
std::vector<Properties>& input_props,
const vec::Vector<T>& vx,
const int k,
const int n,
int vx_zero = -1,
const size_t size = 0,
vec::Buffers<T>* output = nullptr)
: props_indices(input_props.size(), 0)
{
this->k = k;
this->n = n;
Expand All @@ -87,6 +90,12 @@ class DecodeContext {

this->fragments_ids = &fragments_ids;

for (auto& props : input_props) {
// Sort properties on the basis of location of pairs in ascending
// order.
props.sort();
}

A = std::make_unique<vec::Poly<T>>(gf, n);
A_fft_2k = std::make_unique<vec::Vector<T>>(gf, len_2k);
inv_A_i = std::make_unique<vec::Vector<T>>(gf, k);
Expand Down Expand Up @@ -266,6 +275,7 @@ class DecodeContext {

public:
int vx_zero;
std::vector<size_t> props_indices;

private:
unsigned k;
Expand Down
Loading