Skip to content

Commit

Permalink
Merge pull request #39 from burrbull/update
Browse files Browse the repository at this point in the history
missed fixes
  • Loading branch information
burrbull authored Jan 24, 2024
2 parents aeafbf7 + 42e2586 commit 9b1ec8f
Show file tree
Hide file tree
Showing 12 changed files with 68 additions and 121 deletions.
11 changes: 4 additions & 7 deletions src/f32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,7 @@ pub use u10::{
};

mod u15;
#[rustfmt::skip]
pub use u15::{
erfcf as erfc_u15,
};
pub use u15::erfcf as erfc_u15;

mod u35;
#[rustfmt::skip]
Expand Down Expand Up @@ -434,7 +431,7 @@ fn ilogbkf(mut d: f32) -> i32 {
} else {
d
};
let q = ((d.to_bits() >> 23) & 0xff) as i32;
let q = ((d.to_bits() as i32) >> 23) & 0xff;
if m {
q - (64 + 0x7f)
} else {
Expand All @@ -446,7 +443,7 @@ fn ilogbkf(mut d: f32) -> i32 {
// normalized FP value.
#[inline]
fn ilogb2kf(d: f32) -> i32 {
((d.to_bits() >> 23) & 0xff) as i32 - 0x7f
(((d.to_bits() as i32) >> 23) & 0xff) - 0x7f
}

#[inline]
Expand Down Expand Up @@ -939,7 +936,7 @@ pub fn fmodf(x: f32, y: f32) -> f32 {
if d == 0. {
0.
} else {
f32::from_bits(d.to_bits() - 1)
f32::from_bits(((d.to_bits() as i32) - 1) as u32)
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/f32/u05.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ pub fn sqrtf(mut d: f32) -> f32 {
}

// http://en.wikipedia.org/wiki/Fast_inverse_square_root
let mut x = f32::from_bits(0x_5f37_5a86 - ((d + 1e-45).to_bits() >> 1));
let mut x = f32::from_bits((0x_5f37_5a86 - (((d + 1e-45).to_bits() as i32) >> 1)) as u32);

x *= 1.5 - 0.5 * d * x * x;
x *= 1.5 - 0.5 * d * x * x;
Expand Down
24 changes: 13 additions & 11 deletions src/f32/u10.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1011,12 +1011,14 @@ pub fn powf(x: f32, y: f32) -> f32 {
};
result *= if x >= 0. {
1.
} else if !yisint {
f32::NAN
} else if yisodd {
-1.
} else if yisint {
if yisodd {
-1.
} else {
1.
}
} else {
1.
f32::NAN
};

let efx = (fabsfk(x) - 1.).mul_sign(y);
Expand All @@ -1025,12 +1027,12 @@ pub fn powf(x: f32, y: f32) -> f32 {
} else if x.is_nan() || y.is_nan() {
f32::NAN
} else if x.is_infinite() || (x == 0.) {
(if yisodd { x.sign() } else { 1. })
* (if (if x == 0. { -y } else { y }) < 0. {
0.
} else {
f32::INFINITY
})
(if y.is_sign_negative() ^ (x == 0.) {
0.
} else {
f32::INFINITY
})
.mul_sign(if yisodd { x } else { 1. })
} else if y.is_infinite() {
if efx < 0. {
0.
Expand Down
2 changes: 1 addition & 1 deletion src/f32/u35.rs
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@ pub fn sqrtf(mut d: f32) -> f32 {
}

// http://en.wikipedia.org/wiki/Fast_inverse_square_root
let mut x = f32::from_bits(0x_5f37_5a86 - ((d + 1e-45).to_bits() >> 1));
let mut x = f32::from_bits((0x_5f37_5a86 - (((d + 1e-45).to_bits() as i32) >> 1)) as u32);

x *= 1.5 - 0.5 * d * x * x;
x *= 1.5 - 0.5 * d * x * x;
Expand Down
6 changes: 3 additions & 3 deletions src/f32x.rs
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ where
{
let o = d.simd_lt(F32x::splat(5.421_010_862_427_522_e-20));
d = o.select(F32x::splat(1.844_674_407_370_955_2_e19) * d, d);
let q = (d.to_bits() >> U32x::splat(23)).cast() & I32x::splat(0xff);
let q = (d.to_bits().cast() >> I32x::splat(23)) & I32x::splat(0xff);
q - o.select(I32x::splat(64 + 0x7f), I32x::splat(0x7f))
}

Expand All @@ -551,8 +551,8 @@ pub(crate) fn ilogb2kf<const N: usize>(d: F32x<N>) -> I32x<N>
where
LaneCount<N>: SupportedLaneCount,
{
let q = d.to_bits();
let mut q = (q >> U32x::splat(23)).cast();
let q = d.to_bits().cast();
let mut q = q >> I32x::splat(23);
q &= I32x::splat(0xff);
q - I32x::splat(0x7f)
}
Expand Down
2 changes: 1 addition & 1 deletion src/f32x/u05.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ where

let mut x = F32x::from_bits(
(I32x::splat(0x_5f37_5a86)
- ((d + F32x::splat(1e-45)).to_bits() >> U32x::splat(1)).cast())
- ((d + F32x::splat(1e-45)).to_bits().cast() >> I32x::splat(1)))
.cast(),
);

Expand Down
12 changes: 3 additions & 9 deletions src/f32x/u10.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1395,15 +1395,9 @@ where
);

result = (x.is_infinite() | x.simd_eq(F32x::ZERO)).select(
yisodd.select(x.sign(), F32x::ONE)
* F32x::from_bits(
!x.simd_eq(F32x::ZERO)
.select(-y, y)
.simd_lt(F32x::ZERO)
.to_int()
.cast::<u32>()
& F32x::INFINITY.to_bits(),
),
(y.is_sign_negative() ^ x.simd_eq(F32x::ZERO))
.select(F32x::ZERO, F32x::INFINITY)
.mul_sign(yisodd.select(x, F32x::ONE)),
result,
);

Expand Down
4 changes: 2 additions & 2 deletions src/f32x/u35.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1124,10 +1124,10 @@ where
{
let e = F32x::from_bits(
U32x::splat(0x_2000_0000)
+ (U32x::splat(0x_7f00_0000) & (d.to_bits() >> U32x::splat(1))),
+ (U32x::splat(0x_7f00_0000) & (d.to_bits().cast() >> I32x::splat(1))),
);
let m = F32x::from_bits(
I32x::splat(0x_3f00_0000) + (I32x::splat(0x_01ff_ffff) & I32x::from_bits(d)),
I32x::splat(0x_3f00_0000) + (I32x::splat(0x_01ff_ffff) & d.to_bits().cast()),
);
let mut x = vrsqrteq_f32(m);
x = vmulq_f32(x, vrsqrtsq_f32(m, vmulq_f32(x, x)));
Expand Down
9 changes: 3 additions & 6 deletions src/f64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,7 @@ pub use u10::{
};

mod u15;
#[rustfmt::skip]
pub use u15::{
erfc as erfc_u15,
};
pub use u15::erfc as erfc_u15;
mod u35;
#[rustfmt::skip]
pub use u35::{
Expand Down Expand Up @@ -876,15 +873,15 @@ pub fn nextafter(x: f64, y: f64) -> f64 {
let mut cxi = x.to_bits() as i64;
let c = (cxi < 0) == (y < x);
if c {
cxi = -(cxi ^ (1 << 63));
cxi = -(cxi ^ i64::MIN);
}

if x != y {
cxi -= 1;
}

if c {
cxi = -(((cxi as u64) ^ (1u64 << 63)) as i64);
cxi = -(cxi ^ i64::MIN);
}

let cxf = f64::from_bits(cxi as u64);
Expand Down
24 changes: 13 additions & 11 deletions src/f64/u10.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1282,12 +1282,14 @@ pub fn pow(x: f64, y: f64) -> f64 {
};
result *= if x > 0. {
1.
} else if !yisint {
f64::NAN
} else if yisodd {
-1.
} else if yisint {
if yisodd {
-1.
} else {
1.
}
} else {
1.
f64::NAN
};

let efx = (fabsk(x) - 1.).mul_sign(y);
Expand All @@ -1302,12 +1304,12 @@ pub fn pow(x: f64, y: f64) -> f64 {
f64::INFINITY
}
} else if x.is_infinite() || (x == 0.) {
(if yisodd { x.sign() } else { 1. })
* (if (if x == 0. { -y } else { y }) < 0. {
0.
} else {
f64::INFINITY
})
(if y.is_sign_negative() ^ (x == 0.) {
0.
} else {
f64::INFINITY
})
.mul_sign(if yisodd { x } else { 1. })
} else if x.is_nan() || y.is_nan() {
f64::NAN
} else {
Expand Down
60 changes: 7 additions & 53 deletions src/f64x.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,19 +295,6 @@ where
F64x::from_array(ar)
}

#[inline]
fn swap_upper_lower<const N: usize>(i: I64x<N>) -> I64x<N>
where
LaneCount<N>: SupportedLaneCount,
{
// i.rotate_left(I64x::splat(32))
let mut ar = i.to_array();
for v in &mut ar {
*v = v.rotate_left(32);
}
I64x::from_array(ar)
}

impl<const N: usize> Round for F64x<N>
where
LaneCount<N>: SupportedLaneCount,
Expand Down Expand Up @@ -564,7 +551,7 @@ where
let o = d.simd_lt(F64x::splat(4.909_093_465_297_726_6_e-91));
d = o.select(F64x::splat(2.037_035_976_334_486_e90) * d, d);
let mut q = cast_from_upper(d.to_bits());
q &= Ix::splat((((1u32 << 12) - 1) << 20) as _);
q &= Ix::splat(((1 << 12) - 1) << 20);
q = (q.cast() >> Ux::splat(20)).cast();
q - o.cast().select(Ix::splat(300 + 0x3ff), Ix::splat(0x3ff))
}
Expand Down Expand Up @@ -912,47 +899,14 @@ where
LaneCount<N>: SupportedLaneCount,
{
let x = x.simd_eq(F64x::ZERO).select(F64x::ZERO.mul_sign(y), x);
let mut xi2 = x.to_bits().cast::<i64>();
let xi2 = x.to_bits().cast::<i64>();
let c = x.is_sign_negative() ^ y.simd_ge(x);

let mut t = (xi2 ^ I64x::splat(0x_7fff_ffff_ffff_ffff_u64 as _)) + I64x::splat(1);
t += swap_upper_lower(
I64x::splat(1)
& t.simd_eq(I64x::splat(0x_ffff_ffff_0000_0000_u64 as _))
.to_int(),
);
xi2 = c
.select(F64x::from_bits(t.cast()), F64x::from_bits(xi2.cast()))
.to_bits()
.cast();

xi2 -= (x.simd_ne(y).to_int().cast() & U64x::splat(1)).cast();

xi2 = x
.simd_ne(y)
.select(
F64x::from_bits(
(xi2 + swap_upper_lower(
I64x::splat(0x_ffff_ffff_u64 as _)
& xi2.simd_eq(I64x::splat(0x_ffff_ffff_u64 as _)).to_int(),
))
.cast(),
),
F64x::from_bits(xi2.cast()),
)
.to_bits()
.cast();

let mut t = (xi2 ^ I64x::splat(0x_7fff_ffff_ffff_ffff_u64 as _)) + I64x::splat(1);
t += swap_upper_lower(
I64x::splat(1)
& t.simd_eq(I64x::splat(0x_ffff_ffff_0000_0000_u64 as _))
.to_int(),
);
xi2 = c
.select(F64x::from_bits(t.cast()), F64x::from_bits(xi2.cast()))
.to_bits()
.cast();
let xi2 = c.select(-(xi2 ^ I64x::splat(i64::MIN)), xi2);

let xi2 = x.simd_ne(y).select(xi2 - I64x::splat(1), xi2);

let xi2 = c.select(-(xi2 ^ I64x::splat(i64::MIN)), xi2);

let mut ret = F64x::from_bits(xi2.cast());

Expand Down
33 changes: 17 additions & 16 deletions src/f64x/u10.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1222,14 +1222,21 @@ where

let x = F64x::splat(-1.).add_as_doubled(m) / F64x::ONE.add_as_doubled(m);
let x2 = x.0 * x.0;
let x4 = x2 * x2;
let x8 = x4 * x4;

let t = F64x::splat(0.153_207_698_850_270_135_3)
.mla(x2, F64x::splat(0.152_562_905_100_342_871_6))
.mla(x2, F64x::splat(0.181_860_593_293_778_599_6))
.mla(x2, F64x::splat(0.222_221_451_983_938_000_9))
.mla(x2, F64x::splat(0.285_714_293_279_429_931_7))
.mla(x2, F64x::splat(0.399_999_999_963_525_199))
.mla(x2, F64x::splat(0.666_666_666_666_733_354_1));
let t = F64x::poly7(
x2,
x4,
x8,
0.153_207_698_850_270_135_3,
0.152_562_905_100_342_871_6,
0.181_860_593_293_778_599_6,
0.222_221_451_983_938_000_9,
0.285_714_293_279_429_931_7,
0.399_999_999_963_525_199,
0.666_666_666_666_733_354_1,
);

s = s.add_checked(x.scale(F64x::splat(2.)));
s = s.add_checked(x2 * x.0 * t);
Expand Down Expand Up @@ -1799,15 +1806,9 @@ where
);

result = (x.is_infinite() | x.simd_eq(F64x::ZERO)).select(
yisodd.select(x.sign(), F64x::ONE)
* F64x::from_bits(
!x.simd_eq(F64x::ZERO)
.select(-y, y)
.simd_lt(F64x::ZERO)
.to_int()
.cast::<u64>()
& F64x::INFINITY.to_bits(),
),
(y.is_sign_negative() ^ x.simd_eq(F64x::ZERO))
.select(F64x::ZERO, F64x::INFINITY)
.mul_sign(yisodd.select(x, F64x::ONE)),
result,
);

Expand Down

0 comments on commit 9b1ec8f

Please sign in to comment.