From 9fd70229c0044139ba85a738099f76fb30f67dc6 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 03:36:54 +0200 Subject: [PATCH 01/12] GF Ring: use virtualized basic operations Use virtualized basic operations instead of implementing directly for prime fields --- src/gf_ring.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/gf_ring.h b/src/gf_ring.h index 14bf1bdf..ac5e971f 100644 --- a/src/gf_ring.h +++ b/src/gf_ring.h @@ -380,7 +380,7 @@ void RingModN::mul_coef_to_buf(T a, T* src, T* dest, size_t len) const DoubleSizeVal coef = DoubleSizeVal(a); for (i = 0; i < len; i++) { // perform multiplication - dest[i] = T((coef * src[i]) % this->_card); + dest[i] = mul(coef, src[i]); } } @@ -405,7 +405,7 @@ void RingModN::add_two_bufs(T* src, T* dest, size_t len) const size_t i; for (i = 0; i < len; i++) { // perform addition - dest[i] = (src[i] + dest[i]) % this->_card; + dest[i] = add(src[i], dest[i]); } } @@ -427,14 +427,8 @@ template void RingModN::sub_two_bufs(T* bufa, T* bufb, T* res, size_t len) const { size_t i; - T result; for (i = 0; i < len; i++) { - if (bufa[i] >= bufb[i]) { - result = bufa[i] - bufb[i]; - } else { - result = this->_card - (bufb[i] - bufa[i]); - } - res[i] = result; + res[i] = sub(bufa[i], bufb[i]); } } From 7d63a682d758675757343cc4bde877274edd08d2 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 03:38:05 +0200 Subject: [PATCH 02/12] Vec buffer: fix output for dump --- src/vec_buffers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vec_buffers.h b/src/vec_buffers.h index b07ad9f9..60fee018 100644 --- a/src/vec_buffers.h +++ b/src/vec_buffers.h @@ -408,10 +408,10 @@ void Buffers::dump(void) for (int i = 0; i < n; i++) { std::cout << "\n\t" << i << ": "; for (size_t j = 0; j < size - 1; j++) { - std::cout << unsigned((get(i))[j]) << "-"; + std::cout << (get(i))[j] << "-"; } if (size > 0) { - std::cout << unsigned((get(i))[size - 1]); + std::cout << (get(i))[size - 1]; } } std::cout << "\n)\n"; From db0ccfc2a5c3f3fc4c7db7bb234a8651d38e27de Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 03:38:40 +0200 Subject: [PATCH 03/12] FEC NF4: encoding and decoding using packets --- src/fec_rs_nf4.h | 108 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 2 deletions(-) diff --git a/src/fec_rs_nf4.h b/src/fec_rs_nf4.h index d5efe132..05a403e4 100644 --- a/src/fec_rs_nf4.h +++ b/src/fec_rs_nf4.h @@ -47,8 +47,17 @@ namespace fec { template class RsNf4 : public FecCode { public: - RsNf4(unsigned word_size, unsigned n_data, unsigned n_parities) - : FecCode(FecType::NON_SYSTEMATIC, word_size, n_data, n_parities) + RsNf4( + unsigned word_size, + unsigned n_data, + unsigned n_parities, + size_t pkt_size = 8) + : FecCode( + FecType::NON_SYSTEMATIC, + word_size, + n_data, + n_parities, + pkt_size) { this->fec_init(); } @@ -247,6 +256,101 @@ class RsNf4 : public FecCode { output->set(i, ngff4->unpack(output->get(i)).values); } } + + /********** Encoding & Decoding using Buffers **********/ + + void encode( + vec::Buffers* output, + std::vector& props, + off_t offset, + vec::Buffers* words) override + { + for (unsigned i = 0; i < this->n_data; ++i) { + T* chunk = words->get(i); + for (size_t j = 0; j < this->pkt_size; ++j) { + chunk[j] = ngff4->pack(chunk[j]); + } + } + vec::BuffersZeroExtended vwords(words, this->n); + this->fft->fft(output, &vwords); + size_t size = output->get_size(); + for (unsigned frag_id = 0; frag_id < this->code_len; ++frag_id) { + T* chunk = output->get(frag_id); + for (size_t symb_id = 0; symb_id < size; symb_id++) { + GroupedValues true_val = ngff4->unpack(chunk[symb_id]); + if (true_val.flag > 0) { + const ValueLocation loc( + offset + symb_id * this->word_size, frag_id); + props[frag_id].add(loc, std::to_string(true_val.flag)); + } + chunk[symb_id] = true_val.values; + } + } + } + + void decode_prepare( + const DecodeContext& context, + const std::vector& props, + off_t offset, + vec::Buffers* words) override + { + const vec::Vector& fragments_ids = context.get_fragments_id(); + off_t offset_max = offset + this->buf_size; + for (unsigned i = 0; i < this->n_data; ++i) { + const int frag_id = fragments_ids.get(i); + T* chunk = words->get(i); + + // the vector will contain marked symbols that will be packed + // firstly. Since locations are stored in unordered map, the vector + // will be sorted later to facilitate packing un-marked symbols + std::vector packed_symbs; + // pack marked symbols + for (auto const& data : props[frag_id].get_map()) { + off_t loc_offset = data.first.get_offset(); + if (loc_offset >= offset && loc_offset < offset_max) { + // As loc.offset := offset + j * this->word_size + const size_t j = (loc_offset - offset) / this->word_size; + packed_symbs.push_back(j); + // pack symbol at index `j` + uint32_t flag = std::stoul(data.second); + chunk[j] = ngff4->pack(chunk[j], flag); + } + } + // sort the list of packed symbols + std::sort(packed_symbs.begin(), packed_symbs.end()); + + // pack un-marked symbols + size_t curr_frag_index = 0; + for (auto const& done_id : packed_symbs) { + // pack symbols from `curr_frag_index` to `j-1` + for (; curr_frag_index < done_id; ++curr_frag_index) { + chunk[curr_frag_index] = + ngff4->pack(chunk[curr_frag_index]); + } + curr_frag_index++; + } + // pack last symbols from `curr_frag_index` to `this->pkt_size-1` + for (; curr_frag_index < this->pkt_size; ++curr_frag_index) { + chunk[curr_frag_index] = ngff4->pack(chunk[curr_frag_index]); + } + } + } + + void decode_apply( + const DecodeContext& context, + vec::Buffers* output, + vec::Buffers* words) override + { + // decode_apply: do the same thing as in fec_base + FecCode::decode_apply(context, output, words); + // unpack decoded symbols + for (unsigned i = 0; i < this->n_data; ++i) { + T* chunk = output->get(i); + for (unsigned j = 0; j < this->pkt_size; ++j) { + chunk[j] = ngff4->unpack(chunk[j]).values; + } + } + } }; } // namespace fec From 16b1ad02b941615947ab9163e4a806921409834b Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 14:47:15 +0200 Subject: [PATCH 04/12] FEC NF4: create FFT supporting packets --- src/fec_rs_nf4.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fec_rs_nf4.h b/src/fec_rs_nf4.h index 05a403e4..dde14a49 100644 --- a/src/fec_rs_nf4.h +++ b/src/fec_rs_nf4.h @@ -89,14 +89,14 @@ class RsNf4 : public FecCode { int m = arith::get_smallest_power_of_2(this->n_data); this->fft = std::unique_ptr>( - new fft::Radix2(*ngff4, this->n, m)); + new fft::Radix2(*ngff4, this->n, m, this->pkt_size)); this->fft_full = std::unique_ptr>( - new fft::Radix2(*ngff4, this->n)); + new fft::Radix2(*ngff4, this->n, this->n, this->pkt_size)); unsigned len_2k = this->gf->get_code_len_high_compo(2 * this->n_data); this->fft_2k = std::unique_ptr>( - new fft::Radix2(*ngff4, len_2k, len_2k)); + new fft::Radix2(*ngff4, len_2k, len_2k, this->pkt_size)); } inline void init_others() override From 3a762af63adc59d7387500c9f3f04b433fb77e09 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 03:39:10 +0200 Subject: [PATCH 05/12] EC driver: RS-NF4 using packets --- test/ec_driver.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/ec_driver.cpp b/test/ec_driver.cpp index e889b288..dbb959ca 100644 --- a/test/ec_driver.cpp +++ b/test/ec_driver.cpp @@ -360,7 +360,8 @@ template void run_fec_rs_nf4(int word_size, int n_data, int n_parities, int rflag) { quadiron::fec::RsNf4* fec; - fec = new quadiron::fec::RsNf4(word_size, n_data, n_parities); + size_t pkt_size = 1024; + fec = new quadiron::fec::RsNf4(word_size, n_data, n_parities, pkt_size); if (tflag) { print_fec_type(fec); From e6c46f8f61aebd9e251ec68afa5883391aa77267 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 03:40:01 +0200 Subject: [PATCH 06/12] SIMD NF4: remove align attribute It removes align attributes that are not necessary. --- src/simd_nf4.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/simd_nf4.h b/src/simd_nf4.h index 1d4e1f4d..0ecc67a8 100644 --- a/src/simd_nf4.h +++ b/src/simd_nf4.h @@ -49,7 +49,7 @@ static inline aint128 m128i_to_uint128(m128i v) } #endif // #ifdef QUADIRON_USE_AVX2 -inline aint128 expand16(aint16* arr, int n) +inline aint128 expand16(uint16_t* arr, int n) { // since n <= 4 uint16_t _arr[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0}; @@ -61,7 +61,7 @@ inline aint128 expand16(aint16* arr, int n) return m128i_to_uint128(b); } -inline aint128 expand32(aint32* arr, int n) +inline aint128 expand32(uint32_t* arr, int n) { // since n <= 4 uint32_t _arr[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0}; @@ -72,7 +72,7 @@ inline aint128 expand32(aint32* arr, int n) return m128i_to_uint128(b); } -inline GroupedValues<__uint128_t> unpack(aint128 a, int n) +inline GroupedValues<__uint128_t> unpack(__uint128_t a, int n) { aint32 flag = 0; uint32_t ai[4] __attribute__((aligned(ALIGN_SIZE))); @@ -100,7 +100,7 @@ inline GroupedValues<__uint128_t> unpack(aint128 a, int n) return b; } -inline aint128 pack(aint128 a) +inline aint128 pack(__uint128_t a) { m128i _a = _mm_loadu_si128((m128i*)&a); m128i b = _mm_set_epi32( @@ -112,7 +112,7 @@ inline aint128 pack(aint128 a) return m128i_to_uint128(b); } -inline aint128 pack(aint128 a, aint32 flag) +inline aint128 pack(__uint128_t a, uint32_t flag) { aint32 b0, b1, b2, b3; m128i _a = _mm_loadu_si128((m128i*)&a); From 00303763b4329fc99f367bd235b4de04bd8280ff Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 03:40:26 +0200 Subject: [PATCH 07/12] Benchmark: allow RS-NF4 using packets --- benchmark/benchmark.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index bb73f2ae..de77fbe0 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -171,7 +171,7 @@ int Benchmark::init() fec = new quadiron::fec::RsGfpFft(word_size, k, m); break; case EC_TYPE_RS_NF4: - fec = new quadiron::fec::RsNf4(word_size, k, m); + fec = new quadiron::fec::RsNf4(word_size, k, m, pkt_size); break; case EC_TYPE_RS_FNT: fec = new quadiron::fec::RsFnt(word_size, k, m, pkt_size); @@ -787,7 +787,8 @@ int main(int argc, char** argv) } // Currently support operating on packet:RS_FNT - if (params->fec_type != EC_TYPE_RS_FNT) { + if (params->fec_type != EC_TYPE_RS_FNT + && params->fec_type != EC_TYPE_RS_NF4) { params->operation_on_packet = false; } From 2744b1c37c95633f6d9f15280ceae570f19444c5 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 15:12:26 +0200 Subject: [PATCH 08/12] SIMD NF4: enhance unpack --- src/simd_nf4.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/simd_nf4.h b/src/simd_nf4.h index 0ecc67a8..56f00c59 100644 --- a/src/simd_nf4.h +++ b/src/simd_nf4.h @@ -74,25 +74,27 @@ inline aint128 expand32(uint32_t* arr, int n) inline GroupedValues<__uint128_t> unpack(__uint128_t a, int n) { - aint32 flag = 0; - uint32_t ai[4] __attribute__((aligned(ALIGN_SIZE))); - uint32_t bi[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0}; + uint32_t flag; + uint16_t ai[8]; aint128 values; - int i; m128i _a = _mm_loadu_si128((m128i*)&a); - ai[0] = _mm_extract_epi32(_a, 0); - ai[1] = _mm_extract_epi32(_a, 1); - ai[2] = _mm_extract_epi32(_a, 2); - ai[3] = _mm_extract_epi32(_a, 3); - for (i = 0; i < n; i++) { - if (ai[i] == 65536) - flag |= (1 << i); - else - bi[i] = (aint16)ai[i]; - } + ai[0] = _mm_extract_epi16(_a, 0); + ai[1] = _mm_extract_epi16(_a, 1); + ai[2] = _mm_extract_epi16(_a, 2); + ai[3] = _mm_extract_epi16(_a, 3); + ai[4] = _mm_extract_epi16(_a, 4); + ai[5] = _mm_extract_epi16(_a, 5); + ai[6] = _mm_extract_epi16(_a, 6); + ai[7] = _mm_extract_epi16(_a, 7); + + flag = ai[1]; + flag += (ai[3] > 0) ? 2 : 0; + flag += (ai[5] > 0) ? 4 : 0; + flag += (ai[7] > 0) ? 8 : 0; + m128i val = _mm_set_epi64( - _mm_setzero_si64(), _mm_set_pi16(bi[3], bi[2], bi[1], bi[0])); + _mm_setzero_si64(), _mm_set_pi16(ai[6], ai[4], ai[2], ai[0])); _mm_store_si128((m128i*)&values, val); GroupedValues<__uint128_t> b = {values, flag}; From 9573dfa4c6862e6702aa5470d74cf0d51a067fcb Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 15:52:53 +0200 Subject: [PATCH 09/12] GF NF4: add other unpack function working on given GroupedValues --- src/gf_nf4.cpp | 7 +++++++ src/gf_nf4.h | 32 ++++++++++++++++++++++++++++++++ src/simd_nf4.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/src/gf_nf4.cpp b/src/gf_nf4.cpp index 8b1e8a7b..edde93bd 100644 --- a/src/gf_nf4.cpp +++ b/src/gf_nf4.cpp @@ -94,6 +94,13 @@ GroupedValues<__uint128_t> NF4<__uint128_t>::unpack(__uint128_t a) const return simd::unpack(a, this->n); } +template <> +void NF4<__uint128_t>::unpack(__uint128_t a, GroupedValues<__uint128_t>& b) + const +{ + simd::unpack(a, b, this->n); +} + template <> __uint128_t NF4<__uint128_t>::pack(__uint128_t a) const { diff --git a/src/gf_nf4.h b/src/gf_nf4.h index de537312..7ac70a3d 100644 --- a/src/gf_nf4.h +++ b/src/gf_nf4.h @@ -77,6 +77,7 @@ class NF4 : public gf::Field { T pack(T a) const; T pack(T a, uint32_t flag) const; GroupedValues unpack(T a) const; + void unpack(T a, GroupedValues& b) const; T get_nth_root(T n) const override; void compute_omegas(vec::Vector* W, int n, T w) const override; const gf::Field& get_sub_field() const; @@ -430,6 +431,33 @@ GroupedValues NF4::unpack(T a) const return b; } +template +void NF4::unpack(T a, GroupedValues& b) const +{ + uint32_t flag = 0; + uint32_t ae; + uint16_t arr[this->n]; + + ae = (uint32_t)(a & MASK32); + if (ae == 65536) { + flag |= 1; + arr[0] = 0; + } else + arr[0] = (uint16_t)ae; + for (int i = 1; i < this->n; i++) { + a = (a >> 16) >> 16; + ae = (uint32_t)(a & MASK32); + if (ae == 65536) { + flag |= (1 << i); + arr[i] = 0; + } else + arr[i] = ae; + } + + b.flag = flag; + b.values = expand16(arr); +} + // Use for fft template T NF4::get_nth_root(T n) const @@ -535,6 +563,10 @@ __uint128_t NF4<__uint128_t>::pack(__uint128_t a, uint32_t flag) const; template <> GroupedValues<__uint128_t> NF4<__uint128_t>::unpack(__uint128_t a) const; +template <> +void NF4<__uint128_t>::unpack(__uint128_t a, GroupedValues<__uint128_t>& b) + const; + template <> void NF4<__uint128_t>::hadamard_mul(int n, __uint128_t* x, __uint128_t* y) const; diff --git a/src/simd_nf4.h b/src/simd_nf4.h index 56f00c59..5906f441 100644 --- a/src/simd_nf4.h +++ b/src/simd_nf4.h @@ -102,6 +102,35 @@ inline GroupedValues<__uint128_t> unpack(__uint128_t a, int n) return b; } +inline void unpack(__uint128_t a, GroupedValues<__uint128_t>& b, int n) +{ + uint32_t flag; + uint16_t ai[8]; + aint128 values; + + m128i _a = _mm_loadu_si128((m128i*)&a); + ai[0] = _mm_extract_epi16(_a, 0); + ai[1] = _mm_extract_epi16(_a, 1); + ai[2] = _mm_extract_epi16(_a, 2); + ai[3] = _mm_extract_epi16(_a, 3); + ai[4] = _mm_extract_epi16(_a, 4); + ai[5] = _mm_extract_epi16(_a, 5); + ai[6] = _mm_extract_epi16(_a, 6); + ai[7] = _mm_extract_epi16(_a, 7); + + flag = ai[1]; + flag += (ai[3] > 0) ? 2 : 0; + flag += (ai[5] > 0) ? 4 : 0; + flag += (ai[7] > 0) ? 8 : 0; + + m128i val = _mm_set_epi64( + _mm_setzero_si64(), _mm_set_pi16(ai[6], ai[4], ai[2], ai[0])); + _mm_store_si128((m128i*)&values, val); + + b.flag = flag; + b.values = values; +} + inline aint128 pack(__uint128_t a) { m128i _a = _mm_loadu_si128((m128i*)&a); From fe2c3001487c4eb71543d863cf919efb4140a868 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Wed, 13 Jun 2018 15:53:17 +0200 Subject: [PATCH 10/12] FEC NF4: use new unpack --- src/fec_rs_nf4.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/fec_rs_nf4.h b/src/fec_rs_nf4.h index dde14a49..ba7af5e1 100644 --- a/src/fec_rs_nf4.h +++ b/src/fec_rs_nf4.h @@ -143,9 +143,10 @@ class RsNf4 : public FecCode { vec::ZeroExtended vwords(words, this->n); this->fft->fft(output, &vwords); // std::cout << "encoded:"; output->dump(); + GroupedValues true_val; for (unsigned i = 0; i < this->code_len; i++) { T val = output->get(i); - GroupedValues true_val = ngff4->unpack(val); + ngff4->unpack(val, true_val); if (true_val.flag > 0) { props[i].add( ValueLocation(offset, i), std::to_string(true_val.flag)); @@ -274,10 +275,11 @@ class RsNf4 : public FecCode { vec::BuffersZeroExtended vwords(words, this->n); this->fft->fft(output, &vwords); size_t size = output->get_size(); + GroupedValues true_val; for (unsigned frag_id = 0; frag_id < this->code_len; ++frag_id) { T* chunk = output->get(frag_id); for (size_t symb_id = 0; symb_id < size; symb_id++) { - GroupedValues true_val = ngff4->unpack(chunk[symb_id]); + ngff4->unpack(chunk[symb_id], true_val); if (true_val.flag > 0) { const ValueLocation loc( offset + symb_id * this->word_size, frag_id); From d257a9cf7f37b23bef33b02034eb2cd285d7d383 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Thu, 14 Jun 2018 11:08:42 +0200 Subject: [PATCH 11/12] SIMD NF4: add nolint --- src/simd_nf4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simd_nf4.h b/src/simd_nf4.h index 5906f441..61973364 100644 --- a/src/simd_nf4.h +++ b/src/simd_nf4.h @@ -128,7 +128,7 @@ inline void unpack(__uint128_t a, GroupedValues<__uint128_t>& b, int n) _mm_store_si128((m128i*)&values, val); b.flag = flag; - b.values = values; + b.values = values; // NOLINT(clang-analyzer-core.uninitialized.Assign) } inline aint128 pack(__uint128_t a) From d78fe68db9fb25fcb57cedd5754bf5718e9fe6a8 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Tue, 7 Aug 2018 12:19:32 +0200 Subject: [PATCH 12/12] Address comments --- src/fec_rs_nf4.h | 2 +- src/simd_nf4.h | 14 ++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/fec_rs_nf4.h b/src/fec_rs_nf4.h index ba7af5e1..e072ec6b 100644 --- a/src/fec_rs_nf4.h +++ b/src/fec_rs_nf4.h @@ -308,7 +308,7 @@ class RsNf4 : public FecCode { std::vector packed_symbs; // pack marked symbols for (auto const& data : props[frag_id].get_map()) { - off_t loc_offset = data.first.get_offset(); + const off_t loc_offset = data.first.get_offset(); if (loc_offset >= offset && loc_offset < offset_max) { // As loc.offset := offset + j * this->word_size const size_t j = (loc_offset - offset) / this->word_size; diff --git a/src/simd_nf4.h b/src/simd_nf4.h index 61973364..d0052fee 100644 --- a/src/simd_nf4.h +++ b/src/simd_nf4.h @@ -74,7 +74,6 @@ inline aint128 expand32(uint32_t* arr, int n) inline GroupedValues<__uint128_t> unpack(__uint128_t a, int n) { - uint32_t flag; uint16_t ai[8]; aint128 values; @@ -88,10 +87,8 @@ inline GroupedValues<__uint128_t> unpack(__uint128_t a, int n) ai[6] = _mm_extract_epi16(_a, 6); ai[7] = _mm_extract_epi16(_a, 7); - flag = ai[1]; - flag += (ai[3] > 0) ? 2 : 0; - flag += (ai[5] > 0) ? 4 : 0; - flag += (ai[7] > 0) ? 8 : 0; + const uint32_t flag = + ai[1] | (!!ai[3] << 1u) | (!!ai[5] << 2u) | (!!ai[7] << 3u); m128i val = _mm_set_epi64( _mm_setzero_si64(), _mm_set_pi16(ai[6], ai[4], ai[2], ai[0])); @@ -104,7 +101,6 @@ inline GroupedValues<__uint128_t> unpack(__uint128_t a, int n) inline void unpack(__uint128_t a, GroupedValues<__uint128_t>& b, int n) { - uint32_t flag; uint16_t ai[8]; aint128 values; @@ -118,10 +114,8 @@ inline void unpack(__uint128_t a, GroupedValues<__uint128_t>& b, int n) ai[6] = _mm_extract_epi16(_a, 6); ai[7] = _mm_extract_epi16(_a, 7); - flag = ai[1]; - flag += (ai[3] > 0) ? 2 : 0; - flag += (ai[5] > 0) ? 4 : 0; - flag += (ai[7] > 0) ? 8 : 0; + const uint32_t flag = + ai[1] | (!!ai[3] << 1u) | (!!ai[5] << 2u) | (!!ai[7] << 3u); m128i val = _mm_set_epi64( _mm_setzero_si64(), _mm_set_pi16(ai[6], ai[4], ai[2], ai[0]));