Skip to content

Commit

Permalink
Merge pull request #215 from scality/ft/nf4_packet
Browse files Browse the repository at this point in the history
RS NF4: support encoding/decoding using packets
  • Loading branch information
lamphamsy authored Aug 24, 2018
2 parents 4201c58 + d78fe68 commit 73766c8
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 40 deletions.
5 changes: 3 additions & 2 deletions benchmark/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ int Benchmark<T>::init()
fec = new quadiron::fec::RsGfpFft<T>(word_size, k, m);
break;
case EC_TYPE_RS_NF4:
fec = new quadiron::fec::RsNf4<T>(word_size, k, m);
fec = new quadiron::fec::RsNf4<T>(word_size, k, m, pkt_size);
break;
case EC_TYPE_RS_FNT:
fec = new quadiron::fec::RsFnt<T>(word_size, k, m, pkt_size);
Expand Down Expand Up @@ -787,7 +787,8 @@ int main(int argc, char** argv)
}

// Currently support operating on packet:RS_FNT
if (params->fec_type != EC_TYPE_RS_FNT) {
if (params->fec_type != EC_TYPE_RS_FNT
&& params->fec_type != EC_TYPE_RS_NF4) {
params->operation_on_packet = false;
}

Expand Down
118 changes: 112 additions & 6 deletions src/fec_rs_nf4.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,17 @@ namespace fec {
template <typename T>
class RsNf4 : public FecCode<T> {
public:
RsNf4(unsigned word_size, unsigned n_data, unsigned n_parities)
: FecCode<T>(FecType::NON_SYSTEMATIC, word_size, n_data, n_parities)
RsNf4(
unsigned word_size,
unsigned n_data,
unsigned n_parities,
size_t pkt_size = 8)
: FecCode<T>(
FecType::NON_SYSTEMATIC,
word_size,
n_data,
n_parities,
pkt_size)
{
this->fec_init();
}
Expand Down Expand Up @@ -80,14 +89,14 @@ class RsNf4 : public FecCode<T> {

int m = arith::get_smallest_power_of_2<int>(this->n_data);
this->fft = std::unique_ptr<fft::Radix2<T>>(
new fft::Radix2<T>(*ngff4, this->n, m));
new fft::Radix2<T>(*ngff4, this->n, m, this->pkt_size));

this->fft_full = std::unique_ptr<fft::Radix2<T>>(
new fft::Radix2<T>(*ngff4, this->n));
new fft::Radix2<T>(*ngff4, this->n, this->n, this->pkt_size));

unsigned len_2k = this->gf->get_code_len_high_compo(2 * this->n_data);
this->fft_2k = std::unique_ptr<fft::Radix2<T>>(
new fft::Radix2<T>(*ngff4, len_2k, len_2k));
new fft::Radix2<T>(*ngff4, len_2k, len_2k, this->pkt_size));
}

inline void init_others() override
Expand Down Expand Up @@ -134,9 +143,10 @@ class RsNf4 : public FecCode<T> {
vec::ZeroExtended<T> vwords(words, this->n);
this->fft->fft(output, &vwords);
// std::cout << "encoded:"; output->dump();
GroupedValues<T> true_val;
for (unsigned i = 0; i < this->code_len; i++) {
T val = output->get(i);
GroupedValues<T> true_val = ngff4->unpack(val);
ngff4->unpack(val, true_val);
if (true_val.flag > 0) {
props[i].add(
ValueLocation(offset, i), std::to_string(true_val.flag));
Expand Down Expand Up @@ -247,6 +257,102 @@ class RsNf4 : public FecCode<T> {
output->set(i, ngff4->unpack(output->get(i)).values);
}
}

/********** Encoding & Decoding using Buffers **********/

void encode(
vec::Buffers<T>* output,
std::vector<Properties>& props,
off_t offset,
vec::Buffers<T>* words) override
{
for (unsigned i = 0; i < this->n_data; ++i) {
T* chunk = words->get(i);
for (size_t j = 0; j < this->pkt_size; ++j) {
chunk[j] = ngff4->pack(chunk[j]);
}
}
vec::BuffersZeroExtended<T> vwords(words, this->n);
this->fft->fft(output, &vwords);
size_t size = output->get_size();
GroupedValues<T> true_val;
for (unsigned frag_id = 0; frag_id < this->code_len; ++frag_id) {
T* chunk = output->get(frag_id);
for (size_t symb_id = 0; symb_id < size; symb_id++) {
ngff4->unpack(chunk[symb_id], true_val);
if (true_val.flag > 0) {
const ValueLocation loc(
offset + symb_id * this->word_size, frag_id);
props[frag_id].add(loc, std::to_string(true_val.flag));
}
chunk[symb_id] = true_val.values;
}
}
}

void decode_prepare(
const DecodeContext<T>& context,
const std::vector<Properties>& props,
off_t offset,
vec::Buffers<T>* words) override
{
const vec::Vector<T>& fragments_ids = context.get_fragments_id();
off_t offset_max = offset + this->buf_size;
for (unsigned i = 0; i < this->n_data; ++i) {
const int frag_id = fragments_ids.get(i);
T* chunk = words->get(i);

// the vector will contain marked symbols that will be packed
// firstly. Since locations are stored in unordered map, the vector
// will be sorted later to facilitate packing un-marked symbols
std::vector<size_t> packed_symbs;
// pack marked symbols
for (auto const& data : props[frag_id].get_map()) {
const off_t loc_offset = data.first.get_offset();
if (loc_offset >= offset && loc_offset < offset_max) {
// As loc.offset := offset + j * this->word_size
const size_t j = (loc_offset - offset) / this->word_size;
packed_symbs.push_back(j);
// pack symbol at index `j`
uint32_t flag = std::stoul(data.second);
chunk[j] = ngff4->pack(chunk[j], flag);
}
}
// sort the list of packed symbols
std::sort(packed_symbs.begin(), packed_symbs.end());

// pack un-marked symbols
size_t curr_frag_index = 0;
for (auto const& done_id : packed_symbs) {
// pack symbols from `curr_frag_index` to `j-1`
for (; curr_frag_index < done_id; ++curr_frag_index) {
chunk[curr_frag_index] =
ngff4->pack(chunk[curr_frag_index]);
}
curr_frag_index++;
}
// pack last symbols from `curr_frag_index` to `this->pkt_size-1`
for (; curr_frag_index < this->pkt_size; ++curr_frag_index) {
chunk[curr_frag_index] = ngff4->pack(chunk[curr_frag_index]);
}
}
}

void decode_apply(
const DecodeContext<T>& context,
vec::Buffers<T>* output,
vec::Buffers<T>* words) override
{
// decode_apply: do the same thing as in fec_base
FecCode<T>::decode_apply(context, output, words);
// unpack decoded symbols
for (unsigned i = 0; i < this->n_data; ++i) {
T* chunk = output->get(i);
for (unsigned j = 0; j < this->pkt_size; ++j) {
chunk[j] = ngff4->unpack(chunk[j]).values;
}
}
}
};

} // namespace fec
Expand Down
7 changes: 7 additions & 0 deletions src/gf_nf4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ GroupedValues<__uint128_t> NF4<__uint128_t>::unpack(__uint128_t a) const
return simd::unpack(a, this->n);
}

template <>
void NF4<__uint128_t>::unpack(__uint128_t a, GroupedValues<__uint128_t>& b)
const
{
simd::unpack(a, b, this->n);
}

template <>
__uint128_t NF4<__uint128_t>::pack(__uint128_t a) const
{
Expand Down
32 changes: 32 additions & 0 deletions src/gf_nf4.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class NF4 : public gf::Field<T> {
T pack(T a) const;
T pack(T a, uint32_t flag) const;
GroupedValues<T> unpack(T a) const;
void unpack(T a, GroupedValues<T>& b) const;
T get_nth_root(T n) const override;
void compute_omegas(vec::Vector<T>* W, int n, T w) const override;
const gf::Field<uint32_t>& get_sub_field() const;
Expand Down Expand Up @@ -430,6 +431,33 @@ GroupedValues<T> NF4<T>::unpack(T a) const
return b;
}

template <typename T>
void NF4<T>::unpack(T a, GroupedValues<T>& b) const
{
uint32_t flag = 0;
uint32_t ae;
uint16_t arr[this->n];

ae = (uint32_t)(a & MASK32);
if (ae == 65536) {
flag |= 1;
arr[0] = 0;
} else
arr[0] = (uint16_t)ae;
for (int i = 1; i < this->n; i++) {
a = (a >> 16) >> 16;
ae = (uint32_t)(a & MASK32);
if (ae == 65536) {
flag |= (1 << i);
arr[i] = 0;
} else
arr[i] = ae;
}

b.flag = flag;
b.values = expand16(arr);
}

// Use for fft
template <typename T>
T NF4<T>::get_nth_root(T n) const
Expand Down Expand Up @@ -535,6 +563,10 @@ __uint128_t NF4<__uint128_t>::pack(__uint128_t a, uint32_t flag) const;
template <>
GroupedValues<__uint128_t> NF4<__uint128_t>::unpack(__uint128_t a) const;

template <>
void NF4<__uint128_t>::unpack(__uint128_t a, GroupedValues<__uint128_t>& b)
const;

template <>
void NF4<__uint128_t>::hadamard_mul(int n, __uint128_t* x, __uint128_t* y)
const;
Expand Down
12 changes: 3 additions & 9 deletions src/gf_ring.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ void RingModN<T>::mul_coef_to_buf(T a, T* src, T* dest, size_t len) const
DoubleSizeVal<T> coef = DoubleSizeVal<T>(a);
for (i = 0; i < len; i++) {
// perform multiplication
dest[i] = T((coef * src[i]) % this->_card);
dest[i] = mul(coef, src[i]);
}
}

Expand All @@ -405,7 +405,7 @@ void RingModN<T>::add_two_bufs(T* src, T* dest, size_t len) const
size_t i;
for (i = 0; i < len; i++) {
// perform addition
dest[i] = (src[i] + dest[i]) % this->_card;
dest[i] = add(src[i], dest[i]);
}
}

Expand All @@ -427,14 +427,8 @@ template <typename T>
void RingModN<T>::sub_two_bufs(T* bufa, T* bufb, T* res, size_t len) const
{
size_t i;
T result;
for (i = 0; i < len; i++) {
if (bufa[i] >= bufb[i]) {
result = bufa[i] - bufb[i];
} else {
result = this->_card - (bufb[i] - bufa[i]);
}
res[i] = result;
res[i] = sub(bufa[i], bufb[i]);
}
}

Expand Down
65 changes: 45 additions & 20 deletions src/simd_nf4.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ static inline aint128 m128i_to_uint128(m128i v)
}
#endif // #ifdef QUADIRON_USE_AVX2

inline aint128 expand16(aint16* arr, int n)
inline aint128 expand16(uint16_t* arr, int n)
{
// since n <= 4
uint16_t _arr[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0};
Expand All @@ -61,7 +61,7 @@ inline aint128 expand16(aint16* arr, int n)
return m128i_to_uint128(b);
}

inline aint128 expand32(aint32* arr, int n)
inline aint128 expand32(uint32_t* arr, int n)
{
// since n <= 4
uint32_t _arr[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0};
Expand All @@ -72,35 +72,60 @@ inline aint128 expand32(aint32* arr, int n)
return m128i_to_uint128(b);
}

inline GroupedValues<__uint128_t> unpack(aint128 a, int n)
inline GroupedValues<__uint128_t> unpack(__uint128_t a, int n)
{
aint32 flag = 0;
uint32_t ai[4] __attribute__((aligned(ALIGN_SIZE)));
uint32_t bi[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0};
uint16_t ai[8];
aint128 values;
int i;

m128i _a = _mm_loadu_si128((m128i*)&a);
ai[0] = _mm_extract_epi32(_a, 0);
ai[1] = _mm_extract_epi32(_a, 1);
ai[2] = _mm_extract_epi32(_a, 2);
ai[3] = _mm_extract_epi32(_a, 3);
for (i = 0; i < n; i++) {
if (ai[i] == 65536)
flag |= (1 << i);
else
bi[i] = (aint16)ai[i];
}
ai[0] = _mm_extract_epi16(_a, 0);
ai[1] = _mm_extract_epi16(_a, 1);
ai[2] = _mm_extract_epi16(_a, 2);
ai[3] = _mm_extract_epi16(_a, 3);
ai[4] = _mm_extract_epi16(_a, 4);
ai[5] = _mm_extract_epi16(_a, 5);
ai[6] = _mm_extract_epi16(_a, 6);
ai[7] = _mm_extract_epi16(_a, 7);

const uint32_t flag =
ai[1] | (!!ai[3] << 1u) | (!!ai[5] << 2u) | (!!ai[7] << 3u);

m128i val = _mm_set_epi64(
_mm_setzero_si64(), _mm_set_pi16(bi[3], bi[2], bi[1], bi[0]));
_mm_setzero_si64(), _mm_set_pi16(ai[6], ai[4], ai[2], ai[0]));
_mm_store_si128((m128i*)&values, val);

GroupedValues<__uint128_t> b = {values, flag};

return b;
}

inline aint128 pack(aint128 a)
inline void unpack(__uint128_t a, GroupedValues<__uint128_t>& b, int n)
{
uint16_t ai[8];
aint128 values;

m128i _a = _mm_loadu_si128((m128i*)&a);
ai[0] = _mm_extract_epi16(_a, 0);
ai[1] = _mm_extract_epi16(_a, 1);
ai[2] = _mm_extract_epi16(_a, 2);
ai[3] = _mm_extract_epi16(_a, 3);
ai[4] = _mm_extract_epi16(_a, 4);
ai[5] = _mm_extract_epi16(_a, 5);
ai[6] = _mm_extract_epi16(_a, 6);
ai[7] = _mm_extract_epi16(_a, 7);

const uint32_t flag =
ai[1] | (!!ai[3] << 1u) | (!!ai[5] << 2u) | (!!ai[7] << 3u);

m128i val = _mm_set_epi64(
_mm_setzero_si64(), _mm_set_pi16(ai[6], ai[4], ai[2], ai[0]));
_mm_store_si128((m128i*)&values, val);

b.flag = flag;
b.values = values; // NOLINT(clang-analyzer-core.uninitialized.Assign)
}

inline aint128 pack(__uint128_t a)
{
m128i _a = _mm_loadu_si128((m128i*)&a);
m128i b = _mm_set_epi32(
Expand All @@ -112,7 +137,7 @@ inline aint128 pack(aint128 a)
return m128i_to_uint128(b);
}

inline aint128 pack(aint128 a, aint32 flag)
inline aint128 pack(__uint128_t a, uint32_t flag)
{
aint32 b0, b1, b2, b3;
m128i _a = _mm_loadu_si128((m128i*)&a);
Expand Down
4 changes: 2 additions & 2 deletions src/vec_buffers.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,10 +408,10 @@ void Buffers<T>::dump(void)
for (int i = 0; i < n; i++) {
std::cout << "\n\t" << i << ": ";
for (size_t j = 0; j < size - 1; j++) {
std::cout << unsigned((get(i))[j]) << "-";
std::cout << (get(i))[j] << "-";
}
if (size > 0) {
std::cout << unsigned((get(i))[size - 1]);
std::cout << (get(i))[size - 1];
}
}
std::cout << "\n)\n";
Expand Down
Loading

0 comments on commit 73766c8

Please sign in to comment.