Skip to content

Commit

Permalink
clear debug
Browse files Browse the repository at this point in the history
  • Loading branch information
chonpsk committed Sep 9, 2024
2 parents 2ffea5a + 843ff57 commit df5445b
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 1,045 deletions.
1 change: 0 additions & 1 deletion arith/src/extension_field/gf2_128x8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@ cfg_if::cfg_if! {
pub type GF2_128x8 = avx::AVX512GF2_128x8;
}
}

72 changes: 32 additions & 40 deletions arith/src/extension_field/gf2_128x8/avx256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ field_common!(AVX256GF2_128x8);
impl AVX256GF2_128x8 {
#[inline(always)]
pub(crate) fn pack_full(data: __m128i) -> [__m256i; 4] {
[unsafe { _mm256_broadcast_i32x4(data) },
unsafe { _mm256_broadcast_i32x4(data) },
unsafe { _mm256_broadcast_i32x4(data) },
unsafe { _mm256_broadcast_i32x4(data) },
[
unsafe { _mm256_broadcast_i32x4(data) },
unsafe { _mm256_broadcast_i32x4(data) },
unsafe { _mm256_broadcast_i32x4(data) },
unsafe { _mm256_broadcast_i32x4(data) },
]
}

Expand Down Expand Up @@ -86,14 +87,7 @@ const PACKED_0: [__m256i; 4] = [
unsafe { transmute::<[i32; 8], std::arch::x86_64::__m256i>([0; 8]) },
unsafe { transmute::<[i32; 8], std::arch::x86_64::__m256i>([0; 8]) },
];
const _M256_INV_2: __m256i = unsafe {
transmute([
67_u64,
(1_u64) << 63,
67_u64,
(1_u64) << 63,
])
};
const _M256_INV_2: __m256i = unsafe { transmute([67_u64, (1_u64) << 63, 67_u64, (1_u64) << 63]) };
const PACKED_INV_2: [__m256i; 4] = [_M256_INV_2, _M256_INV_2, _M256_INV_2, _M256_INV_2]; // Should not be used?

// p(x) = x^128 + x^7 + x^2 + x + 1
Expand Down Expand Up @@ -124,16 +118,20 @@ impl Field for AVX256GF2_128x8 {
fn zero() -> Self {
unsafe {
let zero = _mm256_setzero_si256();
Self { data: [zero, zero, zero, zero] }
Self {
data: [zero, zero, zero, zero],
}
}
}

#[inline(always)]
fn is_zero(&self) -> bool {
unsafe {
let zero = _mm256_setzero_si256();
let cmp_0 = _mm256_cmpeq_epi64_mask(self.data[0], zero) & _mm256_cmpeq_epi64_mask(self.data[1], zero);
let cmp_1 = _mm256_cmpeq_epi64_mask(self.data[2], zero) & _mm256_cmpeq_epi64_mask(self.data[3], zero);
let cmp_0 = _mm256_cmpeq_epi64_mask(self.data[0], zero)
& _mm256_cmpeq_epi64_mask(self.data[1], zero);
let cmp_1 = _mm256_cmpeq_epi64_mask(self.data[2], zero)
& _mm256_cmpeq_epi64_mask(self.data[3], zero);
(cmp_0 & cmp_1) == 0xF // All 16 64-bit integers are equal (zero)
}
}
Expand All @@ -142,7 +140,9 @@ impl Field for AVX256GF2_128x8 {
fn one() -> Self {
unsafe {
let one = _mm256_set_epi64x(0, 1, 0, 1);
Self { data: [one, one, one, one] }
Self {
data: [one, one, one, one],
}
}
}

Expand Down Expand Up @@ -404,7 +404,9 @@ impl From<u32> for AVX256GF2_128x8 {
fn from(v: u32) -> AVX256GF2_128x8 {
assert!(v < 2); // only 0 and 1 are allowed
let data = unsafe { _mm256_set_epi64x(0, v as i64, 0, v as i64) };
AVX256GF2_128x8 { data: [data, data, data, data] }
AVX256GF2_128x8 {
data: [data, data, data, data],
}
}
}

Expand All @@ -422,9 +424,9 @@ impl Debug for AVX256GF2_128x8 {
let mut data = [0u8; 128];
unsafe {
_mm256_storeu_si256(data.as_mut_ptr() as *mut __m256i, self.data[0]);
_mm256_storeu_si256((data.as_mut_ptr() as *mut __m256i).offset(8), self.data[1]);
_mm256_storeu_si256((data.as_mut_ptr() as *mut __m256i).offset(16), self.data[2]);
_mm256_storeu_si256((data.as_mut_ptr() as *mut __m256i).offset(24), self.data[3]);
_mm256_storeu_si256((data.as_mut_ptr() as *mut __m256i).offset(1), self.data[1]);
_mm256_storeu_si256((data.as_mut_ptr() as *mut __m256i).offset(2), self.data[2]);
_mm256_storeu_si256((data.as_mut_ptr() as *mut __m256i).offset(3), self.data[3]);
}
f.debug_struct("AVX256GF2_128x8")
.field("data", &data)
Expand All @@ -436,8 +438,10 @@ impl PartialEq for AVX256GF2_128x8 {
#[inline(always)]
fn eq(&self, other: &Self) -> bool {
unsafe {
let cmp_0 = _mm256_cmpeq_epi64_mask(self.data[0], other.data[0]) & _mm256_cmpeq_epi64_mask(self.data[1], other.data[1]);
let cmp_1 = _mm256_cmpeq_epi64_mask(self.data[2], other.data[2]) & _mm256_cmpeq_epi64_mask(self.data[3], other.data[3]);
let cmp_0 = _mm256_cmpeq_epi64_mask(self.data[0], other.data[0])
& _mm256_cmpeq_epi64_mask(self.data[1], other.data[1]);
let cmp_1 = _mm256_cmpeq_epi64_mask(self.data[2], other.data[2])
& _mm256_cmpeq_epi64_mask(self.data[3], other.data[3]);
(cmp_0 & cmp_1) == 0xF // All 16 64-bit integers are equal
}
}
Expand Down Expand Up @@ -504,16 +508,8 @@ fn sub_internal(a: &AVX256GF2_128x8, b: &AVX256GF2_128x8) -> AVX256GF2_128x8 {
#[inline]
fn _m256_mul_internal(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let xmmmask = _mm256_set_epi32(
0,
0,
0,
0xffffffffu32 as i32,
0,
0,
0,
0xffffffffu32 as i32,
);
let xmmmask =
_mm256_set_epi32(0, 0, 0, 0xffffffffu32 as i32, 0, 0, 0, 0xffffffffu32 as i32);

let mut tmp3 = _mm256_clmulepi64_epi128(a, b, 0x00);
let mut tmp6 = _mm256_clmulepi64_epi128(a, b, 0x11);
Expand Down Expand Up @@ -632,14 +628,10 @@ impl ExtensionField for AVX256GF2_128x8 {
let v7 = (base.v & 1u8) as i64;

let mut res = *self;
res.data[0] =
unsafe { _mm256_xor_si256(res.data[0], _mm256_set_epi64x(0, v0, 0, v2)) };
res.data[1] =
unsafe { _mm256_xor_si256(res.data[1], _mm256_set_epi64x(0, v4, 0, v6)) };
res.data[2] =
unsafe { _mm256_xor_si256(res.data[2], _mm256_set_epi64x(0, v1, 0, v3,)) };
res.data[3] =
unsafe { _mm256_xor_si256(res.data[3], _mm256_set_epi64x(0, v5, 0, v7)) };
res.data[0] = unsafe { _mm256_xor_si256(res.data[0], _mm256_set_epi64x(0, v0, 0, v2)) };
res.data[1] = unsafe { _mm256_xor_si256(res.data[1], _mm256_set_epi64x(0, v4, 0, v6)) };
res.data[2] = unsafe { _mm256_xor_si256(res.data[2], _mm256_set_epi64x(0, v1, 0, v3)) };
res.data[3] = unsafe { _mm256_xor_si256(res.data[3], _mm256_set_epi64x(0, v5, 0, v7)) };

res
}
Expand Down
Loading

0 comments on commit df5445b

Please sign in to comment.