Skip to content

Commit

Permalink
Merge pull request #31 from burrbull/v02
Browse files Browse the repository at this point in the history
release 0.2.0
  • Loading branch information
burrbull authored Aug 8, 2022
2 parents 77c9266 + 508a0a8 commit cb1af0f
Show file tree
Hide file tree
Showing 26 changed files with 1,156 additions and 1,161 deletions.
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Change Log

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

## [v0.2.0] - 2022-08-08

### Changed

- Ported to `core::simd`

## [v0.1.0] - 2022-08-05

[Unreleased]: https://github.com/rust-embedded/svd2rust/compare/v0.2.0...HEAD
[v0.2.0]: https://github.com/rust-embedded/svd2rust/compare/v0.1.0...v0.2.0
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "sleef"
edition = "2021"
version = "0.1.0"
version = "0.2.0"
authors = ["Andrey Zgarbul <[email protected]>"]
description = "Math functions for SIMD vectors"
keywords = ["simd", "libm", "math"]
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# sleef-rs

Rust port of [Sleef] math library based on [Portable Packed SIMD Vectors]
Rust port of [Sleef] math library based on [Portable SIMD Vectors] a.k.a. `core::simd`

[Sleef]: https://github.com/shibatch/sleef/
[Portable Packed SIMD Vectors]: https://github.com/rust-lang/packed_simd
[Portable Packed SIMD Vectors]: https://github.com/rust-lang/portable-simd
48 changes: 23 additions & 25 deletions src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub trait Round {
}

pub trait MulAdd {
fn mul_add(self, y: Self, z: Self) -> Self;
fn mla(self, y: Self, z: Self) -> Self;
}

pub trait MulSub {
Expand All @@ -100,8 +100,6 @@ where
}

pub trait Sign: MaskType + BitsType {
/* fn is_sign_negative(self) -> Self::Mask;
fn is_sign_positive(self) -> Self::Mask;*/
fn sign_bit(self) -> Self::Bits;
fn sign(self) -> Self;
fn mul_sign(self, other: Self) -> Self;
Expand Down Expand Up @@ -141,25 +139,25 @@ where
{
fn c2v(c: B) -> Self;
fn poly2(x: Self, c1: B, c0: B) -> Self {
x.mul_add(Poly::c2v(c1), Poly::c2v(c0))
x.mla(Poly::c2v(c1), Poly::c2v(c0))
}
fn poly3(x: Self, x2: Self, c2: B, c1: B, c0: B) -> Self {
x2.mul_add(Poly::c2v(c2), x.mul_add(Poly::c2v(c1), Poly::c2v(c0)))
x2.mla(Poly::c2v(c2), x.mla(Poly::c2v(c1), Poly::c2v(c0)))
}
fn poly4(x: Self, x2: Self, c3: B, c2: B, c1: B, c0: B) -> Self {
x2.mul_add(
x.mul_add(Poly::c2v(c3), Poly::c2v(c2)),
x.mul_add(Poly::c2v(c1), Poly::c2v(c0)),
x2.mla(
x.mla(Poly::c2v(c3), Poly::c2v(c2)),
x.mla(Poly::c2v(c1), Poly::c2v(c0)),
)
}
fn poly5(x: Self, x2: Self, x4: Self, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
x4.mul_add(Poly::c2v(c4), Poly::poly4(x, x2, c3, c2, c1, c0))
x4.mla(Poly::c2v(c4), Poly::poly4(x, x2, c3, c2, c1, c0))
}
fn poly6(x: Self, x2: Self, x4: Self, c5: B, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
x4.mul_add(Poly::poly2(x, c5, c4), Poly::poly4(x, x2, c3, c2, c1, c0))
x4.mla(Poly::poly2(x, c5, c4), Poly::poly4(x, x2, c3, c2, c1, c0))
}
fn poly7(x: Self, x2: Self, x4: Self, c6: B, c5: B, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
x4.mul_add(
x4.mla(
Poly::poly3(x, x2, c6, c5, c4),
Poly::poly4(x, x2, c3, c2, c1, c0),
)
Expand All @@ -177,7 +175,7 @@ where
c1: B,
c0: B,
) -> Self {
x4.mul_add(
x4.mla(
Poly::poly4(x, x2, c7, c6, c5, c4),
Poly::poly4(x, x2, c3, c2, c1, c0),
)
Expand All @@ -197,7 +195,7 @@ where
c1: B,
c0: B,
) -> Self {
x8.mul_add(
x8.mla(
Poly::c2v(c8),
Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
)
Expand All @@ -218,7 +216,7 @@ where
c1: B,
c0: B,
) -> Self {
x8.mul_add(
x8.mla(
Poly::poly2(x, c9, c8),
Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
)
Expand All @@ -240,7 +238,7 @@ where
c1: B,
c0: B,
) -> Self {
x8.mul_add(
x8.mla(
Poly::poly3(x, x2, ca, c9, c8),
Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
)
Expand All @@ -263,7 +261,7 @@ where
c1: B,
c0: B,
) -> Self {
x8.mul_add(
x8.mla(
Poly::poly4(x, x2, cb, ca, c9, c8),
Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
)
Expand All @@ -287,7 +285,7 @@ where
c1: B,
c0: B,
) -> Self {
x8.mul_add(
x8.mla(
Poly::poly5(x, x2, x4, cc, cb, ca, c9, c8),
Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
)
Expand All @@ -312,7 +310,7 @@ where
c1: B,
c0: B,
) -> Self {
x8.mul_add(
x8.mla(
Poly::poly6(x, x2, x4, cd, cc, cb, ca, c9, c8),
Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
)
Expand All @@ -338,7 +336,7 @@ where
c1: B,
c0: B,
) -> Self {
x8.mul_add(
x8.mla(
Poly::poly7(x, x2, x4, ce, cd, cc, cb, ca, c9, c8),
Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
)
Expand All @@ -365,7 +363,7 @@ where
c1: B,
c0: B,
) -> Self {
x8.mul_add(
x8.mla(
Poly::poly8(x, x2, x4, cf, ce, cd, cc, cb, ca, c9, c8),
Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
)
Expand Down Expand Up @@ -394,7 +392,7 @@ where
c1: B,
c0: B,
) -> Self {
x16.mul_add(
x16.mla(
Poly::c2v(d0),
Poly::poly16(
x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
Expand Down Expand Up @@ -426,7 +424,7 @@ where
c1: B,
c0: B,
) -> Self {
x16.mul_add(
x16.mla(
Poly::poly2(x, d1, d0),
Poly::poly16(
x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
Expand Down Expand Up @@ -459,7 +457,7 @@ where
c1: B,
c0: B,
) -> Self {
x16.mul_add(
x16.mla(
Poly::poly3(x, x2, d2, d1, d0),
Poly::poly16(
x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
Expand Down Expand Up @@ -493,7 +491,7 @@ where
c1: B,
c0: B,
) -> Self {
x16.mul_add(
x16.mla(
Poly::poly4(x, x2, d3, d2, d1, d0),
Poly::poly16(
x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
Expand Down Expand Up @@ -528,7 +526,7 @@ where
c1: B,
c0: B,
) -> Self {
x16.mul_add(
x16.mla(
Poly::poly5(x, x2, x4, d4, d3, d2, d1, d0),
Poly::poly16(
x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
Expand Down
30 changes: 13 additions & 17 deletions src/f32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,12 @@ impl BitsType for f32 {

impl MulAdd for f32 {
#[inline]
fn mul_add(self, y: Self, z: Self) -> Self {
self * y + z
fn mla(self, y: Self, z: Self) -> Self {
if cfg!(target_feature = "fma") {
self.mul_add(y, z)
} else {
self * y + z
}
}
}

Expand All @@ -495,14 +499,6 @@ impl Poly<Self> for f32 {
}

impl Sign for f32 {
/* #[inline]
fn is_sign_negative(self) -> Self::Mask {
self.is_sign_negative()
}
#[inline]
fn is_sign_positive(self) -> Self::Mask {
self.is_sign_positive()
}*/
#[inline]
fn sign_bit(self) -> Self::Bits {
self.to_bits() & (1 << 31)
Expand Down Expand Up @@ -680,9 +676,9 @@ fn expk2f(d: Doubled<f32>) -> Doubled<f32> {
s += qf * -L2L_F;

let u = 0.198_096_022_4_e-3_f32
.mul_add(s.0, 0.139_425_648_4_e-2)
.mul_add(s.0, 0.833_345_670_3_e-2)
.mul_add(s.0, 0.416_663_736_1_e-1);
.mla(s.0, 0.139_425_648_4_e-2)
.mla(s.0, 0.833_345_670_3_e-2)
.mla(s.0, 0.416_663_736_1_e-1);

let mut t = s * u + 0.166_666_659_414_234_244_790_680_580_464;
t = s * t + 0.5;
Expand Down Expand Up @@ -717,15 +713,15 @@ fn sinpifk(d: f32) -> Doubled<f32> {
} else {
0.309_384_205_4_e-6
})
.mul_add(
.mla(
s,
if o {
0.359_057_708_e-5
} else {
-0.365_730_738_8_e-4
},
)
.mul_add(
.mla(
s,
if o {
-0.325_991_772_1_e-3
Expand Down Expand Up @@ -787,15 +783,15 @@ fn cospifk(d: f32) -> Doubled<f32> {
} else {
0.309_384_205_4_e-6
})
.mul_add(
.mla(
s,
if o {
0.359_057_708_e-5
} else {
-0.365_730_738_8_e-4
},
)
.mul_add(
.mla(
s,
if o {
-0.325_991_772_1_e-3
Expand Down
44 changes: 22 additions & 22 deletions src/f32/fast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ pub fn sinf(mut d: f32) -> f32 {
let t = d;

let q = rintfk(d * FRAC_1_PI);
d = q.mul_add(-PI, d);
d = q.mla(-PI, d);

let s = d * d;

let mut u = (-0.188_174_817_6_e-3)
.mul_add(s, 0.832_350_272_7_e-2)
.mul_add(s, -0.166_665_136_8);
u = (s * d).mul_add(u, d);
.mla(s, 0.832_350_272_7_e-2)
.mla(s, -0.166_665_136_8);
u = (s * d).mla(u, d);

if ((q as i32) & 1) != 0 {
u = -u;
Expand Down Expand Up @@ -46,15 +46,15 @@ fn test_sinf() {
pub fn cosf(mut d: f32) -> f32 {
let t = d;

let q = rintfk(d.mul_add(FRAC_1_PI, -0.5));
d = q.mul_add(-PI, d - FRAC_PI_2);
let q = rintfk(d.mla(FRAC_1_PI, -0.5));
d = q.mla(-PI, d - FRAC_PI_2);

let s = d * d;

let mut u = (-0.188_174_817_6_e-3)
.mul_add(s, 0.832_350_272_7_e-2)
.mul_add(s, -0.166_665_136_8);
u = (s * d).mul_add(u, d);
.mla(s, 0.832_350_272_7_e-2)
.mla(s, -0.166_665_136_8);
u = (s * d).mla(u, d);

if ((q as i32) & 1) == 0 {
u = -u;
Expand Down Expand Up @@ -96,29 +96,29 @@ fn logk3f(mut d: f32) -> f32 {
let x2 = x * x;

let t = 0.239_282_846_450_805_664_062_5
.mul_add(x2, 0.285_182_118_415_832_519_531_25)
.mul_add(x2, 0.400_005_877_017_974_853_515_625)
.mul_add(x2, 0.666_666_686_534_881_591_796_875)
.mul_add(x2, 2.);
.mla(x2, 0.285_182_118_415_832_519_531_25)
.mla(x2, 0.400_005_877_017_974_853_515_625)
.mla(x2, 0.666_666_686_534_881_591_796_875)
.mla(x2, 2.);

x.mul_add(t, 0.693_147_180_559_945_286_226_764 * (e as f32))
x.mla(t, 0.693_147_180_559_945_286_226_764 * (e as f32))
}

#[inline]
fn expk3f(d: f32) -> f32 {
let q = rintfk(d * R_LN2_F);

let mut s = q.mul_add(-L2U_F, d);
s = q.mul_add(-L2L_F, s);
let mut s = q.mla(-L2U_F, d);
s = q.mla(-L2L_F, s);

let mut u = 0.000_198_527_617_612_853_646_278_381
.mul_add(s, 0.001_393_043_552_525_341_510_772_71)
.mul_add(s, 0.008_333_360_776_305_198_669_433_59)
.mul_add(s, 0.041_666_485_369_205_474_853_515_6)
.mul_add(s, 0.166_666_671_633_720_397_949_219)
.mul_add(s, 0.5);
.mla(s, 0.001_393_043_552_525_341_510_772_71)
.mla(s, 0.008_333_360_776_305_198_669_433_59)
.mla(s, 0.041_666_485_369_205_474_853_515_6)
.mla(s, 0.166_666_671_633_720_397_949_219)
.mla(s, 0.5);

u = (s * s).mul_add(u, s + 1.);
u = (s * s).mla(u, s + 1.);
u = ldexpkf(u, q as i32);

if d < -104. {
Expand Down
Loading

0 comments on commit cb1af0f

Please sign in to comment.