-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RS NF4: support encoding/decoding using packets #215
Changes from all commits
9fd7022
7d63a68
db0ccfc
16b1ad0
3a762af
e6c46f8
0030376
2744b1c
9573dfa
fe2c300
d257a9c
d78fe68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -380,7 +380,7 @@ void RingModN<T>::mul_coef_to_buf(T a, T* src, T* dest, size_t len) const | |
DoubleSizeVal<T> coef = DoubleSizeVal<T>(a); | ||
for (i = 0; i < len; i++) { | ||
// perform multiplication | ||
dest[i] = T((coef * src[i]) % this->_card); | ||
dest[i] = mul(coef, src[i]); | ||
} | ||
} | ||
|
||
|
@@ -405,7 +405,7 @@ void RingModN<T>::add_two_bufs(T* src, T* dest, size_t len) const | |
size_t i; | ||
for (i = 0; i < len; i++) { | ||
// perform addition | ||
dest[i] = (src[i] + dest[i]) % this->_card; | ||
dest[i] = add(src[i], dest[i]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of having a method |
||
} | ||
} | ||
|
||
|
@@ -427,14 +427,8 @@ template <typename T> | |
void RingModN<T>::sub_two_bufs(T* bufa, T* bufb, T* res, size_t len) const | ||
{ | ||
size_t i; | ||
T result; | ||
for (i = 0; i < len; i++) { | ||
if (bufa[i] >= bufb[i]) { | ||
result = bufa[i] - bufb[i]; | ||
} else { | ||
result = this->_card - (bufb[i] - bufa[i]); | ||
} | ||
res[i] = result; | ||
res[i] = sub(bufa[i], bufb[i]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of having a method If these operations are the one from
by
(replace |
||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,7 +49,7 @@ static inline aint128 m128i_to_uint128(m128i v) | |
} | ||
#endif // #ifdef QUADIRON_USE_AVX2 | ||
|
||
inline aint128 expand16(aint16* arr, int n) | ||
inline aint128 expand16(uint16_t* arr, int n) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this change? It should be explained in the commit message. |
||
{ | ||
// since n <= 4 | ||
uint16_t _arr[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0}; | ||
|
@@ -61,7 +61,7 @@ inline aint128 expand16(aint16* arr, int n) | |
return m128i_to_uint128(b); | ||
} | ||
|
||
inline aint128 expand32(aint32* arr, int n) | ||
inline aint128 expand32(uint32_t* arr, int n) | ||
{ | ||
// since n <= 4 | ||
uint32_t _arr[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0}; | ||
|
@@ -72,35 +72,60 @@ inline aint128 expand32(aint32* arr, int n) | |
return m128i_to_uint128(b); | ||
} | ||
|
||
inline GroupedValues<__uint128_t> unpack(aint128 a, int n) | ||
inline GroupedValues<__uint128_t> unpack(__uint128_t a, int n) | ||
{ | ||
aint32 flag = 0; | ||
uint32_t ai[4] __attribute__((aligned(ALIGN_SIZE))); | ||
uint32_t bi[4] __attribute__((aligned(ALIGN_SIZE))) = {0, 0, 0, 0}; | ||
uint16_t ai[8]; | ||
aint128 values; | ||
int i; | ||
|
||
m128i _a = _mm_loadu_si128((m128i*)&a); | ||
ai[0] = _mm_extract_epi32(_a, 0); | ||
ai[1] = _mm_extract_epi32(_a, 1); | ||
ai[2] = _mm_extract_epi32(_a, 2); | ||
ai[3] = _mm_extract_epi32(_a, 3); | ||
for (i = 0; i < n; i++) { | ||
if (ai[i] == 65536) | ||
flag |= (1 << i); | ||
else | ||
bi[i] = (aint16)ai[i]; | ||
} | ||
ai[0] = _mm_extract_epi16(_a, 0); | ||
ai[1] = _mm_extract_epi16(_a, 1); | ||
ai[2] = _mm_extract_epi16(_a, 2); | ||
ai[3] = _mm_extract_epi16(_a, 3); | ||
ai[4] = _mm_extract_epi16(_a, 4); | ||
ai[5] = _mm_extract_epi16(_a, 5); | ||
ai[6] = _mm_extract_epi16(_a, 6); | ||
ai[7] = _mm_extract_epi16(_a, 7); | ||
|
||
const uint32_t flag = | ||
ai[1] | (!!ai[3] << 1u) | (!!ai[5] << 2u) | (!!ai[7] << 3u); | ||
|
||
m128i val = _mm_set_epi64( | ||
_mm_setzero_si64(), _mm_set_pi16(bi[3], bi[2], bi[1], bi[0])); | ||
_mm_setzero_si64(), _mm_set_pi16(ai[6], ai[4], ai[2], ai[0])); | ||
_mm_store_si128((m128i*)&values, val); | ||
|
||
GroupedValues<__uint128_t> b = {values, flag}; | ||
|
||
return b; | ||
} | ||
|
||
inline aint128 pack(aint128 a) | ||
inline void unpack(__uint128_t a, GroupedValues<__uint128_t>& b, int n) | ||
{ | ||
uint16_t ai[8]; | ||
aint128 values; | ||
|
||
m128i _a = _mm_loadu_si128((m128i*)&a); | ||
ai[0] = _mm_extract_epi16(_a, 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for (int i = 0; i < sizeof(ai)/sizeof(ai[0]); ++i) {
ai[i] = _mm_extract_epi16(_a, i);
} The loop count being constant, the compiler will unroll the loop anyway, but the code will be more readable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. _mm_extract_epi16 does not accept the second argument as a variable :( |
||
ai[1] = _mm_extract_epi16(_a, 1); | ||
ai[2] = _mm_extract_epi16(_a, 2); | ||
ai[3] = _mm_extract_epi16(_a, 3); | ||
ai[4] = _mm_extract_epi16(_a, 4); | ||
ai[5] = _mm_extract_epi16(_a, 5); | ||
ai[6] = _mm_extract_epi16(_a, 6); | ||
ai[7] = _mm_extract_epi16(_a, 7); | ||
|
||
const uint32_t flag = | ||
ai[1] | (!!ai[3] << 1u) | (!!ai[5] << 2u) | (!!ai[7] << 3u); | ||
|
||
m128i val = _mm_set_epi64( | ||
_mm_setzero_si64(), _mm_set_pi16(ai[6], ai[4], ai[2], ai[0])); | ||
_mm_store_si128((m128i*)&values, val); | ||
|
||
b.flag = flag; | ||
b.values = values; // NOLINT(clang-analyzer-core.uninitialized.Assign) | ||
} | ||
|
||
inline aint128 pack(__uint128_t a) | ||
{ | ||
m128i _a = _mm_loadu_si128((m128i*)&a); | ||
m128i b = _mm_set_epi32( | ||
|
@@ -112,7 +137,7 @@ inline aint128 pack(aint128 a) | |
return m128i_to_uint128(b); | ||
} | ||
|
||
inline aint128 pack(aint128 a, aint32 flag) | ||
inline aint128 pack(__uint128_t a, uint32_t flag) | ||
{ | ||
aint32 b0, b1, b2, b3; | ||
m128i _a = _mm_loadu_si128((m128i*)&a); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of having a method
mul
, could we overload the*
operator? That would make the code more readable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This kind of changes would be in a separated PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, you don't need to handle it here, I'll do it in #208