Skip to content

Commit

Permalink
Merge pull request #44 from AndersTrier/AndersTrier/utils
Browse files Browse the repository at this point in the history
Fewer methods in `trait Engine`. Introduce `utils.rs`
  • Loading branch information
AndersTrier authored Aug 19, 2024
2 parents 50a2a99 + 52a8f57 commit 4c303f5
Show file tree
Hide file tree
Showing 12 changed files with 170 additions and 206 deletions.
31 changes: 2 additions & 29 deletions benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,46 +338,19 @@ fn benchmarks_engine_one<E: Engine>(c: &mut Criterion, name: &str, engine: E) {
b.iter(|| E::eval_poly(black_box(&mut data), GF_ORDER / 8))
});

// XOR MUL
// MUL

let mut x = &mut generate_shards_64(1, shard_len_64, 0)[0];
let y = &generate_shards_64(1, shard_len_64, 1)[0];

group.bench_function("xor", |b| {
b.iter(|| E::xor(black_box(&mut x), black_box(&y)))
});

group.bench_function("mul", |b| {
b.iter(|| engine.mul(black_box(&mut x), black_box(12345)))
});

// XOR_WITHIN

let shards_256_data = &mut generate_shards_64(1, 256 * shard_len_64, 0)[0];
let mut shards_256 = ShardsRefMut::new(256, shard_len_64, shards_256_data.as_mut());

group.bench_function("xor_within 128*2", |b| {
b.iter(|| {
E::xor_within(
black_box(&mut shards_256),
black_box(0),
black_box(128),
black_box(128),
)
})
});

// FORMAL DERIVATIVE
// FFT IFFT

let shards_128_data = &mut generate_shards_64(1, 128 * shard_len_64, 0)[0];
let mut shards_128 = ShardsRefMut::new(128, shard_len_64, shards_128_data.as_mut());

group.bench_function("formal_derivative 128", |b| {
b.iter(|| E::formal_derivative(black_box(&mut shards_128)))
});

// FFT IFFT

group.bench_function("FFT 128", |b| {
b.iter(|| {
engine.fft(
Expand Down
113 changes: 8 additions & 105 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@
//! [`ReedSolomonDecoder`]: crate::ReedSolomonDecoder
//! [`rate`]: crate::rate
use std::iter::zip;

pub(crate) use self::shards::Shards;
pub(crate) use utils::{fft_skew_end, formal_derivative, ifft_skew_end, xor_within};

pub use self::{
engine_default::DefaultEngine, engine_naive::Naive, engine_nosimd::NoSimd, shards::ShardsRefMut,
Expand All @@ -60,6 +59,7 @@ mod engine_neon;

mod fwht;
mod shards;
mod utils;

pub mod tables;

Expand Down Expand Up @@ -90,50 +90,15 @@ pub const CANTOR_BASIS: [GfElement; GF_BITS] = [
/// Galois field element.
pub type GfElement = u16;

// ======================================================================
// FUNCTIONS - PUBLIC - Galois field operations

/// Some kind of addition.
#[inline(always)]
pub fn add_mod(x: GfElement, y: GfElement) -> GfElement {
let sum = u32::from(x) + u32::from(y);
(sum + (sum >> GF_BITS)) as GfElement
}

/// Some kind of subtraction.
#[inline(always)]
pub fn sub_mod(x: GfElement, y: GfElement) -> GfElement {
let dif = u32::from(x).wrapping_sub(u32::from(y));
dif.wrapping_add(dif >> GF_BITS) as GfElement
}

// ======================================================================
// FUNCTIONS - CRATE - Evaluate polynomial

// We have this function here instead of inside 'trait Engine' to allow
// it to be included and compiled with SIMD features enabled within the
// SIMD engines.
#[inline(always)]
pub(crate) fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) {
let log_walsh = tables::initialize_log_walsh();

fwht::fwht(erasures, truncated_size);

for (e, factor) in std::iter::zip(erasures.iter_mut(), log_walsh.iter()) {
let product = u32::from(*e) * u32::from(*factor);
*e = add_mod(product as GfElement, (product >> GF_BITS) as GfElement);
}

fwht::fwht(erasures, GF_ORDER);
}

// ======================================================================
// Engine - PUBLIC

/// Implementation of basic low-level algorithms needed
/// for Reed-Solomon encoding/decoding.
/// Implementation of compute-intensive low-level algorithms needed
/// for Reed-Solomon encoding/decoding. This is the trait you would
/// implement to provide SIMD support for a CPU architecture not
/// already provided.
///
/// These algorithms are not properly documented.
/// These algorithms are not properly documented in this library.
///
/// [`Naive`] engine is provided for those who want to
/// study the source code to understand [`Engine`].
Expand Down Expand Up @@ -187,74 +152,12 @@ pub trait Engine {
// ============================================================
// PROVIDED

/// `x[] ^= y[]`
#[inline(always)]
fn xor(xs: &mut [[u8; 64]], ys: &[[u8; 64]])
where
Self: Sized,
{
debug_assert_eq!(xs.len(), ys.len());

for (x_chunk, y_chunk) in zip(xs.iter_mut(), ys.iter()) {
for (x, y) in zip(x_chunk.iter_mut(), y_chunk.iter()) {
*x ^= y;
}
}
}

/// Evaluate polynomial.
fn eval_poly(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize)
where
Self: Sized,
{
eval_poly(erasures, truncated_size)
}

/// FFT with `skew_delta = pos + size`.
#[inline(always)]
fn fft_skew_end(
&self,
data: &mut ShardsRefMut,
pos: usize,
size: usize,
truncated_size: usize,
) {
self.fft(data, pos, size, truncated_size, pos + size)
}

/// Formal derivative.
fn formal_derivative(data: &mut ShardsRefMut)
where
Self: Sized,
{
for i in 1..data.len() {
let width: usize = 1 << i.trailing_zeros();
Self::xor_within(data, i - width, i, width);
}
}

/// IFFT with `skew_delta = pos + size`.
#[inline(always)]
fn ifft_skew_end(
&self,
data: &mut ShardsRefMut,
pos: usize,
size: usize,
truncated_size: usize,
) {
self.ifft(data, pos, size, truncated_size, pos + size)
}

/// `data[x .. x + count] ^= data[y .. y + count]`
///
/// Ranges must not overlap.
#[inline(always)]
fn xor_within(data: &mut ShardsRefMut, x: usize, y: usize, count: usize)
where
Self: Sized,
{
let (xs, ys) = data.flat2_mut(x, y, count);
Self::xor(xs, ys);
utils::eval_poly(erasures, truncated_size)
}
}

Expand Down
25 changes: 12 additions & 13 deletions src/engine/engine_avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ use std::arch::x86::*;
use std::arch::x86_64::*;

use crate::engine::{
self,
tables::{self, Mul128, Multiply128lutT, Skew},
Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER,
utils, Engine, GfElement, ShardsRefMut, GF_MODULUS, GF_ORDER,
};

// ======================================================================
Expand Down Expand Up @@ -238,8 +237,8 @@ impl Avx2 {
// FIRST LAYER

if log_m02 == GF_MODULUS {
Self::xor(s2, s0);
Self::xor(s3, s1);
utils::xor(s2, s0);
utils::xor(s3, s1);
} else {
self.fft_butterfly_partial(s0, s2, log_m02);
self.fft_butterfly_partial(s1, s3, log_m02);
Expand All @@ -248,13 +247,13 @@ impl Avx2 {
// SECOND LAYER

if log_m01 == GF_MODULUS {
Self::xor(s1, s0);
utils::xor(s1, s0);
} else {
self.fft_butterfly_partial(s0, s1, log_m01);
}

if log_m23 == GF_MODULUS {
Self::xor(s3, s2);
utils::xor(s3, s2);
} else {
self.fft_butterfly_partial(s2, s3, log_m23);
}
Expand Down Expand Up @@ -315,7 +314,7 @@ impl Avx2 {
let (x, y) = data.dist2_mut(pos + r, 1);

if log_m == GF_MODULUS {
Self::xor(y, x);
utils::xor(y, x);
} else {
self.fft_butterfly_partial(x, y, log_m)
}
Expand Down Expand Up @@ -379,22 +378,22 @@ impl Avx2 {
// FIRST LAYER

if log_m01 == GF_MODULUS {
Self::xor(s1, s0);
utils::xor(s1, s0);
} else {
self.ifft_butterfly_partial(s0, s1, log_m01);
}

if log_m23 == GF_MODULUS {
Self::xor(s3, s2);
utils::xor(s3, s2);
} else {
self.ifft_butterfly_partial(s2, s3, log_m23);
}

// SECOND LAYER

if log_m02 == GF_MODULUS {
Self::xor(s2, s0);
Self::xor(s3, s1);
utils::xor(s2, s0);
utils::xor(s3, s1);
} else {
self.ifft_butterfly_partial(s0, s2, log_m02);
self.ifft_butterfly_partial(s1, s3, log_m02);
Expand Down Expand Up @@ -451,7 +450,7 @@ impl Avx2 {
if dist < size {
let log_m = self.skew[dist + skew_delta - 1];
if log_m == GF_MODULUS {
Self::xor_within(data, pos + dist, pos, dist);
utils::xor_within(data, pos + dist, pos, dist);
} else {
let (mut a, mut b) = data.split_at_mut(pos + dist);
for i in 0..dist {
Expand All @@ -472,7 +471,7 @@ impl Avx2 {
impl Avx2 {
#[target_feature(enable = "avx2")]
unsafe fn eval_poly_avx2(erasures: &mut [GfElement; GF_ORDER], truncated_size: usize) {
engine::eval_poly(erasures, truncated_size)
utils::eval_poly(erasures, truncated_size)
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/engine/engine_naive.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::engine::{
tables::{self, Exp, Log, Skew},
Engine, GfElement, ShardsRefMut, GF_MODULUS,
utils, Engine, GfElement, ShardsRefMut, GF_MODULUS,
};

// ======================================================================
Expand Down Expand Up @@ -60,7 +60,7 @@ impl Engine for Naive {
if log_m != GF_MODULUS {
self.mul_add(a, b, log_m);
}
Self::xor(b, a);
utils::xor(b, a);
}
r += dist * 2;
}
Expand Down Expand Up @@ -89,7 +89,7 @@ impl Engine for Naive {

// IFFT BUTTERFLY

Self::xor(b, a);
utils::xor(b, a);
if log_m != GF_MODULUS {
self.mul_add(a, b, log_m);
}
Expand Down
Loading

0 comments on commit 4c303f5

Please sign in to comment.