From bf58e1158ab882951168b470219563be265a7302 Mon Sep 17 00:00:00 2001
From: Andrey Zgarbul <zgarbul.andrey@gmail.com>
Date: Mon, 8 Aug 2022 15:46:34 +0300
Subject: [PATCH 1/5] release 0.2.0

---
 Cargo.toml | 2 +-
 README.md  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 78d6bba..a7e1f82 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "sleef"
 edition = "2021"
-version = "0.1.0"
+version = "0.2.0"
 authors = ["Andrey Zgarbul <zgarbul.andrey@gmail.com>"]
 description = "Math functions for SIMD vectors"
 keywords = ["simd", "libm", "math"]
diff --git a/README.md b/README.md
index d393691..f2222e5 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 
 # sleef-rs
 
-Rust port of [Sleef] math library based on [Portable Packed SIMD Vectors]
+Rust port of [Sleef] math library based on [Portable SIMD Vectors] a.k.a. `core::simd`
 
 [Sleef]: https://github.com/shibatch/sleef/
-[Portable Packed SIMD Vectors]: https://github.com/rust-lang/packed_simd
\ No newline at end of file
+[Portable Packed SIMD Vectors]: https://github.com/rust-lang/portable-simd
\ No newline at end of file

From a766d5d50cc2c6e8ba996fa038187a24b7b9ae39 Mon Sep 17 00:00:00 2001
From: Andrey Zgarbul <zgarbul.andrey@gmail.com>
Date: Mon, 8 Aug 2022 18:28:57 +0300
Subject: [PATCH 2/5] cleanups

---
 src/common.rs        |  2 --
 src/f32.rs           |  8 --------
 src/f32x.rs          | 18 ------------------
 src/f32x/u10_impl.rs |  4 ++--
 src/f32x/u35_impl.rs |  8 ++++----
 src/f64.rs           |  8 --------
 src/f64x.rs          | 20 --------------------
 src/f64x/u10_impl.rs |  4 ++--
 src/f64x/u35_impl.rs |  8 ++++----
 9 files changed, 12 insertions(+), 68 deletions(-)

diff --git a/src/common.rs b/src/common.rs
index f15608c..83b8d7c 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -100,8 +100,6 @@ where
 }
 
 pub trait Sign: MaskType + BitsType {
-    /*    fn is_sign_negative(self) -> Self::Mask;
-    fn is_sign_positive(self) -> Self::Mask;*/
     fn sign_bit(self) -> Self::Bits;
     fn sign(self) -> Self;
     fn mul_sign(self, other: Self) -> Self;
diff --git a/src/f32.rs b/src/f32.rs
index e2cd6ed..2c40a1c 100644
--- a/src/f32.rs
+++ b/src/f32.rs
@@ -495,14 +495,6 @@ impl Poly<Self> for f32 {
 }
 
 impl Sign for f32 {
-    /*    #[inline]
-    fn is_sign_negative(self) -> Self::Mask {
-        self.is_sign_negative()
-    }
-    #[inline]
-    fn is_sign_positive(self) -> Self::Mask {
-        self.is_sign_positive()
-    }*/
     #[inline]
     fn sign_bit(self) -> Self::Bits {
         self.to_bits() & (1 << 31)
diff --git a/src/f32x.rs b/src/f32x.rs
index a259eba..6f2a59e 100644
--- a/src/f32x.rs
+++ b/src/f32x.rs
@@ -647,25 +647,7 @@ macro_rules! impl_math_f32 {
             }
         }
 
-        #[inline]
-        fn vsel_vi2_vf_vf_vi2_vi2(f0: F32x, f1: F32x, x: I32x, y: I32x) -> I32x {
-            f0.simd_lt(f1).select(x, y)
-        }
-
-        #[inline]
-        fn vsel_vi2_vf_vi2(d: F32x, x: I32x) -> I32x {
-            d.is_sign_negative().to_int() & x
-        }
-
         impl Sign for F32x {
-/*            #[inline]
-            fn is_sign_negative(self) -> Self::Mask {
-                self.sign_bit().simd_ne(Self::Bits::splat(0))
-            }
-            #[inline]
-            fn is_sign_positive(self) -> Self::Mask {
-                !self.is_sign_negative()
-            }*/
             #[inline]
             fn sign_bit(self) -> Self::Bits {
                 self.to_bits() & NEG_ZERO.to_bits()
diff --git a/src/f32x/u10_impl.rs b/src/f32x/u10_impl.rs
index 14e52ef..64882c8 100644
--- a/src/f32x/u10_impl.rs
+++ b/src/f32x/u10_impl.rs
@@ -542,7 +542,7 @@ macro_rules! impl_math_f32_u10 {
 
         #[inline]
         fn atan2kf_u1(y: Doubled<F32x>, mut x: Doubled<F32x>) -> Doubled<F32x> {
-            let q = vsel_vi2_vf_vf_vi2_vi2(x.0, ZERO, I32x::splat(-2), I32x::splat(0));
+            let q = x.0.simd_lt(ZERO).select(I32x::splat(-2), I32x::splat(0));
             let p = x.0.simd_lt(ZERO);
             let r = p.to_int().cast() & NEG_ZERO.to_bits();
             x = Doubled::new(
@@ -550,7 +550,7 @@ macro_rules! impl_math_f32_u10 {
                 F32x::from_bits(x.1.to_bits() ^ r)
             );
 
-            let q = vsel_vi2_vf_vf_vi2_vi2(x.0, y.0, q + I32x::splat(1), q);
+            let q = x.0.simd_lt(y.0).select(q + I32x::splat(1), q);
             let p = x.0.simd_lt(y.0);
             let s = p.select_doubled(-x, y);
             let mut t = p.select_doubled(y, x);
diff --git a/src/f32x/u35_impl.rs b/src/f32x/u35_impl.rs
index 54e245e..816fd6a 100644
--- a/src/f32x/u35_impl.rs
+++ b/src/f32x/u35_impl.rs
@@ -654,10 +654,10 @@ macro_rules! impl_math_f32_u35 {
 
         #[inline]
         fn atan2kf(y: F32x, x: F32x) -> F32x {
-            let q = vsel_vi2_vf_vi2(x, I32x::splat(-2));
+            let q = x.is_sign_negative().to_int() & I32x::splat(-2);
             let x = x.abs();
 
-            let q = vsel_vi2_vf_vf_vi2_vi2(x, y, q + I32x::splat(1), q);
+            let q = x.simd_lt(y).select(q + I32x::splat(1), q);
             let p = x.simd_lt(y);
             let s = p.select(-x, y);
             let mut t = x.simd_max(y);
@@ -790,10 +790,10 @@ macro_rules! impl_math_f32_u35 {
         /// These functions evaluates the arc tangent function of a value in ***a***.
         /// The error bound of the returned value is `3.5 ULP`.
         pub fn atanf(d: F32x) -> F32x {
-            let q = vsel_vi2_vf_vi2(d, I32x::splat(2));
+            let q = d.is_sign_negative().to_int() & I32x::splat(2);
             let s = d.abs();
 
-            let q = vsel_vi2_vf_vf_vi2_vi2(ONE, s, q + I32x::splat(1), q);
+            let q = ONE.simd_lt(s).select(q + I32x::splat(1), q);
             let s = ONE.simd_lt(s).select(s.recip(), s);
 
             let mut t = s * s;
diff --git a/src/f64.rs b/src/f64.rs
index c91df15..1d9dfba 100644
--- a/src/f64.rs
+++ b/src/f64.rs
@@ -509,14 +509,6 @@ impl Poly<f64> for f64 {
 }
 
 impl Sign for f64 {
-    /*    #[inline]
-    fn is_sign_negative(self) -> Self::Mask {
-        self.is_sign_negative()
-    }
-    #[inline]
-    fn is_sign_positive(self) -> Self::Mask {
-        self.is_sign_positive()
-    }*/
     #[inline]
     fn sign_bit(self) -> Self::Bits {
         self.to_bits() & (1 << 63)
diff --git a/src/f64x.rs b/src/f64x.rs
index d974000..aa59935 100644
--- a/src/f64x.rs
+++ b/src/f64x.rs
@@ -608,27 +608,7 @@ macro_rules! impl_math_f64 {
             }
         }
 
-        // return d0 < d1 ? x : y
-        #[inline]
-        fn vsel_vi_vd_vd_vi_vi(d0: F64x, d1: F64x, x: Ix, y: Ix) -> Ix {
-            d0.simd_lt(d1).cast().select(x, y)
-        }
-
-        // return d0 < 0 ? x : 0
-        #[inline]
-        fn vsel_vi_vd_vi(d: F64x, x: Ix) -> Ix {
-            d.is_sign_negative().cast::<i32>().to_int() & x
-        }
-
         impl Sign for F64x {
-/*            #[inline]
-            fn is_sign_negative(self) -> Self::Mask {
-                self.sign_bit().simd_ne(Self::Bits::splat(0))
-            }
-            #[inline]
-            fn is_sign_positive(self) -> Self::Mask {
-                !self.is_sign_negative()
-            }*/
             #[inline]
             fn sign_bit(self) -> Self::Bits {
                 self.to_bits() & NEG_ZERO.to_bits()
diff --git a/src/f64x/u10_impl.rs b/src/f64x/u10_impl.rs
index 05348e5..56316fc 100644
--- a/src/f64x/u10_impl.rs
+++ b/src/f64x/u10_impl.rs
@@ -711,7 +711,7 @@ macro_rules! impl_math_f64_u10 {
 
         #[inline]
         fn atan2k_u1(y: Doubled<F64x>, mut x: Doubled<F64x>) -> Doubled<F64x> {
-            let q = vsel_vi_vd_vi(x.0, Ix::splat(-2));
+            let q = x.0.is_sign_negative().cast().to_int() & Ix::splat(-2);
             let p = x.0.simd_lt(ZERO);
             let b = p.to_int().cast() & NEG_ZERO.to_bits();
             x = Doubled::new(
@@ -719,7 +719,7 @@ macro_rules! impl_math_f64_u10 {
                 F64x::from_bits(b ^ x.1.to_bits())
             );
 
-            let q = vsel_vi_vd_vd_vi_vi(x.0, y.0, q + Ix::splat(1), q);
+            let q = x.0.simd_lt(y.0).cast().select(q + Ix::splat(1), q);
             let p = x.0.simd_lt(y.0);
             let s = p.select_doubled(-x, y);
             let mut t = p.select_doubled(y, x);
diff --git a/src/f64x/u35_impl.rs b/src/f64x/u35_impl.rs
index 7cd86a2..8f8c979 100644
--- a/src/f64x/u35_impl.rs
+++ b/src/f64x/u35_impl.rs
@@ -739,10 +739,10 @@ macro_rules! impl_math_f64_u35 {
 
         #[inline]
         fn atan2k(y: F64x, x: F64x) -> F64x {
-            let q = vsel_vi_vd_vi(x, Ix::splat(-2));
+            let q = x.is_sign_negative().cast().to_int() & Ix::splat(-2);
             let x = x.abs();
 
-            let q = vsel_vi_vd_vd_vi_vi(x, y, q + Ix::splat(1), q);
+            let q = x.simd_lt(y).cast().select(q + Ix::splat(1), q);
             let p = x.simd_lt(y);
             let s = p.select(-x, y);
             let mut t = x.simd_max(y);
@@ -924,10 +924,10 @@ macro_rules! impl_math_f64_u35 {
                 let w = s;
             }*/
 
-            let q = vsel_vi_vd_vi(s, Ix::splat(2));
+            let q = s.is_sign_negative().cast().to_int() & Ix::splat(2);
             s = s.abs();
 
-            let q = vsel_vi_vd_vd_vi_vi(ONE, s, q + Ix::splat(1), q);
+            let q = ONE.simd_lt(s).cast().select(q + Ix::splat(1), q);
             s = ONE.simd_lt(s).select(s.recip(), s);
 
             let mut t = s * s;

From 4d5aa3c90531e716df08ce1bb432763f8cf058c2 Mon Sep 17 00:00:00 2001
From: Andrey Zgarbul <zgarbul.andrey@gmail.com>
Date: Mon, 8 Aug 2022 18:34:31 +0300
Subject: [PATCH 3/5] changelog

---
 CHANGELOG.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..fe6a583
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,19 @@
+# Change Log
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](http://semver.org/).
+
+## [Unreleased]
+
+## [v0.2.0] - 2022-08-08
+
+### Changed
+
+- Ported to `core::simd`
+
+## [v0.1.0] - 2022-08-05
+
+[Unreleased]: https://github.com/rust-embedded/svd2rust/compare/v0.2.0...HEAD
+[v0.2.0]: https://github.com/rust-embedded/svd2rust/compare/v0.1.0...v0.2.0

From 3ea454c3f142c8a40e4ab25c6d0d03f6a001946d Mon Sep 17 00:00:00 2001
From: Andrey Zgarbul <zgarbul.andrey@gmail.com>
Date: Mon, 8 Aug 2022 18:53:54 +0300
Subject: [PATCH 4/5] don't use mul_add if absent

---
 src/f32x.rs | 24 ++++++++++++++++++------
 src/f64x.rs | 24 ++++++++++++++++++------
 2 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/f32x.rs b/src/f32x.rs
index 6f2a59e..69ad49a 100644
--- a/src/f32x.rs
+++ b/src/f32x.rs
@@ -553,24 +553,36 @@ macro_rules! impl_math_f32 {
         impl MulAdd for F32x {
             #[inline]
             fn mul_add(self, y: Self, z: Self) -> Self {
-                use std::simd::{StdFloat};
-                <Self as StdFloat>::mul_add(self, y, z)
+                if cfg!(target_feature = "fma") {
+                    use std::simd::{StdFloat};
+                    <Self as StdFloat>::mul_add(self, y, z)
+                } else {
+                    self * y + z
+                }
             }
         }
 
         impl MulSub for F32x {
             #[inline]
             fn mul_sub(self, y: Self, z: Self) -> Self {
-                use std::simd::{StdFloat};
-                <Self as StdFloat>::mul_add(self, y, -z)
+                if cfg!(target_feature = "fma") {
+                    use std::simd::{StdFloat};
+                    <Self as StdFloat>::mul_add(self, y, -z)
+                } else {
+                    self * y - z
+                }
             }
         }
 
         impl NegMulAdd for F32x {
             #[inline]
             fn neg_mul_add(self, y: Self, z: Self) -> Self {
-                use std::simd::{StdFloat};
-                <Self as StdFloat>::mul_add(-self, y, z)
+                if cfg!(target_feature = "fma") {
+                    use std::simd::{StdFloat};
+                    <Self as StdFloat>::mul_add(-self, y, z)
+                } else {
+                    -self * y + z
+                }
             }
         }
 
diff --git a/src/f64x.rs b/src/f64x.rs
index aa59935..9e07170 100644
--- a/src/f64x.rs
+++ b/src/f64x.rs
@@ -526,24 +526,36 @@ macro_rules! impl_math_f64 {
         impl MulAdd for F64x {
             #[inline]
             fn mul_add(self, y: Self, z: Self) -> Self {
-                use std::simd::{StdFloat};
-                <Self as StdFloat>::mul_add(self, y, z)
+                if cfg!(target_feature = "fma") {
+                    use std::simd::{StdFloat};
+                    <Self as StdFloat>::mul_add(self, y, z)
+                } else {
+                    self * y + z
+                }
             }
         }
 
         impl MulSub for F64x {
             #[inline]
             fn mul_sub(self, y: Self, z: Self) -> Self {
-                use std::simd::{StdFloat};
-                <Self as StdFloat>::mul_add(self, y, -z)
+                if cfg!(target_feature = "fma") {
+                    use std::simd::{StdFloat};
+                    <Self as StdFloat>::mul_add(self, y, -z)
+                } else {
+                    self * y - z
+                }
             }
         }
 
         impl NegMulAdd for F64x {
             #[inline]
             fn neg_mul_add(self, y: Self, z: Self) -> Self {
-                use std::simd::{StdFloat};
-                <Self as StdFloat>::mul_add(-self, y, z)
+                if cfg!(target_feature = "fma") {
+                    use std::simd::{StdFloat};
+                    <Self as StdFloat>::mul_add(-self, y, z)
+                } else {
+                    -self * y + z
+                }
             }
         }
 

From 508a0a81c08a0d2cef310ccda10cb8d5d24eeea7 Mon Sep 17 00:00:00 2001
From: Andrey Zgarbul <zgarbul.andrey@gmail.com>
Date: Mon, 8 Aug 2022 20:01:58 +0300
Subject: [PATCH 5/5] mul_add -> mla

---
 src/common.rs         |  46 +++---
 src/f32.rs            |  22 +--
 src/f32/fast.rs       |  44 +++---
 src/f32/u05.rs        |   8 +-
 src/f32/u10.rs        | 166 +++++++++++-----------
 src/f32/u15.rs        |   8 +-
 src/f32/u35.rs        | 174 +++++++++++------------
 src/f32x.rs           |  34 ++---
 src/f32x/fast_impl.rs |  46 +++---
 src/f32x/u05_impl.rs  |   8 +-
 src/f32x/u10_impl.rs  | 212 ++++++++++++++--------------
 src/f32x/u15_impl.rs  |   8 +-
 src/f32x/u35_impl.rs  | 294 +++++++++++++++++++-------------------
 src/f64.rs            |  44 +++---
 src/f64/u05.rs        |  30 ++--
 src/f64/u10.rs        | 144 +++++++++----------
 src/f64/u15.rs        |  34 ++---
 src/f64/u35.rs        | 218 ++++++++++++++---------------
 src/f64x.rs           |  30 ++--
 src/f64x/u05_impl.rs  |  30 ++--
 src/f64x/u10_impl.rs  | 224 ++++++++++++++---------------
 src/f64x/u15_impl.rs  |  34 ++---
 src/f64x/u35_impl.rs  | 318 +++++++++++++++++++++---------------------
 23 files changed, 1092 insertions(+), 1084 deletions(-)

diff --git a/src/common.rs b/src/common.rs
index 83b8d7c..2bee51d 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -76,7 +76,7 @@ pub trait Round {
 }
 
 pub trait MulAdd {
-    fn mul_add(self, y: Self, z: Self) -> Self;
+    fn mla(self, y: Self, z: Self) -> Self;
 }
 
 pub trait MulSub {
@@ -139,25 +139,25 @@ where
 {
     fn c2v(c: B) -> Self;
     fn poly2(x: Self, c1: B, c0: B) -> Self {
-        x.mul_add(Poly::c2v(c1), Poly::c2v(c0))
+        x.mla(Poly::c2v(c1), Poly::c2v(c0))
     }
     fn poly3(x: Self, x2: Self, c2: B, c1: B, c0: B) -> Self {
-        x2.mul_add(Poly::c2v(c2), x.mul_add(Poly::c2v(c1), Poly::c2v(c0)))
+        x2.mla(Poly::c2v(c2), x.mla(Poly::c2v(c1), Poly::c2v(c0)))
     }
     fn poly4(x: Self, x2: Self, c3: B, c2: B, c1: B, c0: B) -> Self {
-        x2.mul_add(
-            x.mul_add(Poly::c2v(c3), Poly::c2v(c2)),
-            x.mul_add(Poly::c2v(c1), Poly::c2v(c0)),
+        x2.mla(
+            x.mla(Poly::c2v(c3), Poly::c2v(c2)),
+            x.mla(Poly::c2v(c1), Poly::c2v(c0)),
         )
     }
     fn poly5(x: Self, x2: Self, x4: Self, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
-        x4.mul_add(Poly::c2v(c4), Poly::poly4(x, x2, c3, c2, c1, c0))
+        x4.mla(Poly::c2v(c4), Poly::poly4(x, x2, c3, c2, c1, c0))
     }
     fn poly6(x: Self, x2: Self, x4: Self, c5: B, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
-        x4.mul_add(Poly::poly2(x, c5, c4), Poly::poly4(x, x2, c3, c2, c1, c0))
+        x4.mla(Poly::poly2(x, c5, c4), Poly::poly4(x, x2, c3, c2, c1, c0))
     }
     fn poly7(x: Self, x2: Self, x4: Self, c6: B, c5: B, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
-        x4.mul_add(
+        x4.mla(
             Poly::poly3(x, x2, c6, c5, c4),
             Poly::poly4(x, x2, c3, c2, c1, c0),
         )
@@ -175,7 +175,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x4.mul_add(
+        x4.mla(
             Poly::poly4(x, x2, c7, c6, c5, c4),
             Poly::poly4(x, x2, c3, c2, c1, c0),
         )
@@ -195,7 +195,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::c2v(c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -216,7 +216,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly2(x, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -238,7 +238,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly3(x, x2, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -261,7 +261,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly4(x, x2, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -285,7 +285,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly5(x, x2, x4, cc, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -310,7 +310,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly6(x, x2, x4, cd, cc, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -336,7 +336,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly7(x, x2, x4, ce, cd, cc, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -363,7 +363,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly8(x, x2, x4, cf, ce, cd, cc, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -392,7 +392,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::c2v(d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
@@ -424,7 +424,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::poly2(x, d1, d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
@@ -457,7 +457,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::poly3(x, x2, d2, d1, d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
@@ -491,7 +491,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::poly4(x, x2, d3, d2, d1, d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
@@ -526,7 +526,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::poly5(x, x2, x4, d4, d3, d2, d1, d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
diff --git a/src/f32.rs b/src/f32.rs
index 2c40a1c..52f1d4b 100644
--- a/src/f32.rs
+++ b/src/f32.rs
@@ -483,8 +483,12 @@ impl BitsType for f32 {
 
 impl MulAdd for f32 {
     #[inline]
-    fn mul_add(self, y: Self, z: Self) -> Self {
-        self * y + z
+    fn mla(self, y: Self, z: Self) -> Self {
+        if cfg!(target_feature = "fma") {
+            self.mul_add(y, z)
+        } else {
+            self * y + z
+        }
     }
 }
 
@@ -672,9 +676,9 @@ fn expk2f(d: Doubled<f32>) -> Doubled<f32> {
     s += qf * -L2L_F;
 
     let u = 0.198_096_022_4_e-3_f32
-        .mul_add(s.0, 0.139_425_648_4_e-2)
-        .mul_add(s.0, 0.833_345_670_3_e-2)
-        .mul_add(s.0, 0.416_663_736_1_e-1);
+        .mla(s.0, 0.139_425_648_4_e-2)
+        .mla(s.0, 0.833_345_670_3_e-2)
+        .mla(s.0, 0.416_663_736_1_e-1);
 
     let mut t = s * u + 0.166_666_659_414_234_244_790_680_580_464;
     t = s * t + 0.5;
@@ -709,7 +713,7 @@ fn sinpifk(d: f32) -> Doubled<f32> {
     } else {
         0.309_384_205_4_e-6
     })
-    .mul_add(
+    .mla(
         s,
         if o {
             0.359_057_708_e-5
@@ -717,7 +721,7 @@ fn sinpifk(d: f32) -> Doubled<f32> {
             -0.365_730_738_8_e-4
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             -0.325_991_772_1_e-3
@@ -779,7 +783,7 @@ fn cospifk(d: f32) -> Doubled<f32> {
     } else {
         0.309_384_205_4_e-6
     })
-    .mul_add(
+    .mla(
         s,
         if o {
             0.359_057_708_e-5
@@ -787,7 +791,7 @@ fn cospifk(d: f32) -> Doubled<f32> {
             -0.365_730_738_8_e-4
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             -0.325_991_772_1_e-3
diff --git a/src/f32/fast.rs b/src/f32/fast.rs
index 4c6251b..656f076 100644
--- a/src/f32/fast.rs
+++ b/src/f32/fast.rs
@@ -9,14 +9,14 @@ pub fn sinf(mut d: f32) -> f32 {
     let t = d;
 
     let q = rintfk(d * FRAC_1_PI);
-    d = q.mul_add(-PI, d);
+    d = q.mla(-PI, d);
 
     let s = d * d;
 
     let mut u = (-0.188_174_817_6_e-3)
-        .mul_add(s, 0.832_350_272_7_e-2)
-        .mul_add(s, -0.166_665_136_8);
-    u = (s * d).mul_add(u, d);
+        .mla(s, 0.832_350_272_7_e-2)
+        .mla(s, -0.166_665_136_8);
+    u = (s * d).mla(u, d);
 
     if ((q as i32) & 1) != 0 {
         u = -u;
@@ -46,15 +46,15 @@ fn test_sinf() {
 pub fn cosf(mut d: f32) -> f32 {
     let t = d;
 
-    let q = rintfk(d.mul_add(FRAC_1_PI, -0.5));
-    d = q.mul_add(-PI, d - FRAC_PI_2);
+    let q = rintfk(d.mla(FRAC_1_PI, -0.5));
+    d = q.mla(-PI, d - FRAC_PI_2);
 
     let s = d * d;
 
     let mut u = (-0.188_174_817_6_e-3)
-        .mul_add(s, 0.832_350_272_7_e-2)
-        .mul_add(s, -0.166_665_136_8);
-    u = (s * d).mul_add(u, d);
+        .mla(s, 0.832_350_272_7_e-2)
+        .mla(s, -0.166_665_136_8);
+    u = (s * d).mla(u, d);
 
     if ((q as i32) & 1) == 0 {
         u = -u;
@@ -96,29 +96,29 @@ fn logk3f(mut d: f32) -> f32 {
     let x2 = x * x;
 
     let t = 0.239_282_846_450_805_664_062_5
-        .mul_add(x2, 0.285_182_118_415_832_519_531_25)
-        .mul_add(x2, 0.400_005_877_017_974_853_515_625)
-        .mul_add(x2, 0.666_666_686_534_881_591_796_875)
-        .mul_add(x2, 2.);
+        .mla(x2, 0.285_182_118_415_832_519_531_25)
+        .mla(x2, 0.400_005_877_017_974_853_515_625)
+        .mla(x2, 0.666_666_686_534_881_591_796_875)
+        .mla(x2, 2.);
 
-    x.mul_add(t, 0.693_147_180_559_945_286_226_764 * (e as f32))
+    x.mla(t, 0.693_147_180_559_945_286_226_764 * (e as f32))
 }
 
 #[inline]
 fn expk3f(d: f32) -> f32 {
     let q = rintfk(d * R_LN2_F);
 
-    let mut s = q.mul_add(-L2U_F, d);
-    s = q.mul_add(-L2L_F, s);
+    let mut s = q.mla(-L2U_F, d);
+    s = q.mla(-L2L_F, s);
 
     let mut u = 0.000_198_527_617_612_853_646_278_381
-        .mul_add(s, 0.001_393_043_552_525_341_510_772_71)
-        .mul_add(s, 0.008_333_360_776_305_198_669_433_59)
-        .mul_add(s, 0.041_666_485_369_205_474_853_515_6)
-        .mul_add(s, 0.166_666_671_633_720_397_949_219)
-        .mul_add(s, 0.5);
+        .mla(s, 0.001_393_043_552_525_341_510_772_71)
+        .mla(s, 0.008_333_360_776_305_198_669_433_59)
+        .mla(s, 0.041_666_485_369_205_474_853_515_6)
+        .mla(s, 0.166_666_671_633_720_397_949_219)
+        .mla(s, 0.5);
 
-    u = (s * s).mul_add(u, s + 1.);
+    u = (s * s).mla(u, s + 1.);
     u = ldexpkf(u, q as i32);
 
     if d < -104. {
diff --git a/src/f32/u05.rs b/src/f32/u05.rs
index 3bf322e..3df4779 100644
--- a/src/f32/u05.rs
+++ b/src/f32/u05.rs
@@ -20,8 +20,8 @@ pub fn sincospif(d: f32) -> (f32, f32) {
     //
 
     let u = 0.309_384_205_4_e-6_f32
-        .mul_add(s, -0.365_730_738_8_e-4)
-        .mul_add(s, 0.249_039_358_5_e-2);
+        .mla(s, -0.365_730_738_8_e-4)
+        .mla(s, 0.249_039_358_5_e-2);
     let mut x = u * s
         + Doubled::new(
             -0.080_745_510_756_969_451_904,
@@ -37,8 +37,8 @@ pub fn sincospif(d: f32) -> (f32, f32) {
     let mut rsin = if d.is_neg_zero() { -0. } else { f32::from(x) };
 
     let u = (-0.243_061_180_1_e-7_f32)
-        .mul_add(s, 0.359_057_708_e-5)
-        .mul_add(s, -0.325_991_772_1_e-3);
+        .mla(s, 0.359_057_708_e-5)
+        .mla(s, -0.325_991_772_1_e-3);
     x = u * s
         + Doubled::new(
             0.015_854_343_771_934_509_277,
diff --git a/src/f32/u10.rs b/src/f32/u10.rs
index b810756..e15c81a 100644
--- a/src/f32/u10.rs
+++ b/src/f32/u10.rs
@@ -13,7 +13,7 @@ pub fn sinf(d: f32) -> f32 {
     if fabsfk(d) < TRIGRANGEMAX2_F {
         let qf = rintfk(d * FRAC_1_PI);
         q = qf as i32;
-        let u = qf.mul_add(-PI_A2_F, d);
+        let u = qf.mla(-PI_A2_F, d);
         s = u.add_as_doubled(qf * (-PI_B2_F));
         s.add_checked_assign(qf * (-PI_C2_F));
     } else {
@@ -35,8 +35,8 @@ pub fn sinf(d: f32) -> f32 {
     s = s.square();
 
     let mut u = 2.608_315_980_978_659_354_150_3_e-6_f32
-        .mul_add(s.0, -0.000_198_106_907_191_686_332_225_8)
-        .mul_add(s.0, 0.008_333_078_585_565_090_179_443_36);
+        .mla(s.0, -0.000_198_106_907_191_686_332_225_8)
+        .mla(s.0, 0.008_333_078_585_565_090_179_443_36);
 
     let x =
         (1.).add_checked((-0.166_666_597_127_914_428_710_938).add_checked_as_doubled(u * s.0) * s);
@@ -68,7 +68,7 @@ pub fn cosf(mut d: f32) -> f32 {
 
     if fabsfk(d) < TRIGRANGEMAX2_F {
         d = fabsfk(d);
-        let dq = rintfk(d * FRAC_1_PI - 0.5).mul_add(2., 1.);
+        let dq = rintfk(d * FRAC_1_PI - 0.5).mla(2., 1.);
         q = dq as i32;
         s = d.add_as_doubled(dq * (-PI_A2_F * 0.5));
         s += dq * (-PI_B2_F * 0.5);
@@ -92,8 +92,8 @@ pub fn cosf(mut d: f32) -> f32 {
     s = s.square();
 
     let mut u = 2.608_315_980_978_659_354_150_3_e-6_f32
-        .mul_add(s.0, -0.000_198_106_907_191_686_332_225_8)
-        .mul_add(s.0, 0.008_333_078_585_565_090_179_443_36);
+        .mla(s.0, -0.000_198_106_907_191_686_332_225_8)
+        .mla(s.0, 0.008_333_078_585_565_090_179_443_36);
 
     let x =
         (1.).add_checked((-0.166_666_597_127_914_428_710_938).add_checked_as_doubled(u * s.0) * s);
@@ -126,7 +126,7 @@ pub fn sincosf(d: f32) -> (f32, f32) {
     if fabsfk(d) < TRIGRANGEMAX2_F {
         let qf = rintfk(d * FRAC_2_PI);
         q = qf as i32;
-        let u = qf.mul_add(-PI_A2_F * 0.5, d);
+        let u = qf.mla(-PI_A2_F * 0.5, d);
         s = u.add_as_doubled(qf * (-PI_B2_F * 0.5));
         s.add_checked_assign(qf * (-PI_C2_F * 0.5));
     } else {
@@ -142,8 +142,8 @@ pub fn sincosf(d: f32) -> (f32, f32) {
     s.0 = s.square_as_f();
 
     let u = (-0.000_195_169_282_960_705_459_117_889_f32)
-        .mul_add(s.0, 0.008_332_157_507_538_795_471_191_41)
-        .mul_add(s.0, -0.166_666_537_523_269_653_320_312)
+        .mla(s.0, 0.008_332_157_507_538_795_471_191_41)
+        .mla(s.0, -0.166_666_537_523_269_653_320_312)
         * s.0
         * t.0;
 
@@ -151,10 +151,10 @@ pub fn sincosf(d: f32) -> (f32, f32) {
     let mut rsin = if d.is_neg_zero() { -0. } else { f32::from(x) };
 
     let u = (-2.718_118_423_672_422_068_193_55_e-7_f32)
-        .mul_add(s.0, 2.479_904_469_510_074_704_885_48_e-5)
-        .mul_add(s.0, -0.001_388_887_874_782_085_418_701_17)
-        .mul_add(s.0, 0.041_666_664_183_139_801_025_390_6)
-        .mul_add(s.0, -0.5);
+        .mla(s.0, 2.479_904_469_510_074_704_885_48_e-5)
+        .mla(s.0, -0.001_388_887_874_782_085_418_701_17)
+        .mla(s.0, 0.041_666_664_183_139_801_025_390_6)
+        .mla(s.0, -0.5);
 
     x = (1.).add_checked(s.0.mul_as_doubled(u));
     let mut rcos = f32::from(x);
@@ -195,7 +195,7 @@ pub fn tanf(d: f32) -> f32 {
     if fabsfk(d) < TRIGRANGEMAX2_F {
         let qf = rintfk(d * FRAC_2_PI);
         q = qf as i32;
-        let u = qf.mul_add(-PI_A2_F * 0.5, d);
+        let u = qf.mla(-PI_A2_F * 0.5, d);
         s = u.add_as_doubled(qf * (-PI_B2_F * 0.5));
         s.add_checked_assign(qf * (-PI_C2_F * 0.5));
     } else {
@@ -215,10 +215,10 @@ pub fn tanf(d: f32) -> f32 {
     s = s.square().normalize();
 
     let u = 0.004_466_364_625_841_379_165_649_41_f32
-        .mul_add(s.0, -8.392_018_207_814_544_439_315_8_e-5)
-        .mul_add(s.0, 0.010_963_924_229_145_050_048_828_1)
-        .mul_add(s.0, 0.021_236_030_384_898_185_729_980_5)
-        .mul_add(s.0, 0.054_068_714_380_264_282_226_562_5);
+        .mla(s.0, -8.392_018_207_814_544_439_315_8_e-5)
+        .mla(s.0, 0.010_963_924_229_145_050_048_828_1)
+        .mla(s.0, 0.021_236_030_384_898_185_729_980_5)
+        .mla(s.0, 0.054_068_714_380_264_282_226_562_5);
 
     let mut x = (0.133_325_666_189_193_725_585_938).add_checked_as_doubled(u * s.0);
     x = (1.).add_checked((0.333_333_611_488_342_285_156_25).add_checked(s * x) * s);
@@ -259,13 +259,13 @@ fn atan2kf_u1(mut y: Doubled<f32>, mut x: Doubled<f32>) -> Doubled<f32> {
     let mut t = s.square().normalize();
 
     let u = (-0.001_763_979_089_446_365_833_282_47_f32)
-        .mul_add(t.0, 0.010_790_090_076_625_347_137_451_2)
-        .mul_add(t.0, -0.030_956_460_162_997_245_788_574_2)
-        .mul_add(t.0, 0.057_736_508_548_259_735_107_421_9)
-        .mul_add(t.0, -0.083_895_072_340_965_270_996_093_8)
-        .mul_add(t.0, 0.109_463_557_600_975_036_621_094)
-        .mul_add(t.0, -0.142_626_821_994_781_494_140_625)
-        .mul_add(t.0, 0.199_983_194_470_405_578_613_281);
+        .mla(t.0, 0.010_790_090_076_625_347_137_451_2)
+        .mla(t.0, -0.030_956_460_162_997_245_788_574_2)
+        .mla(t.0, 0.057_736_508_548_259_735_107_421_9)
+        .mla(t.0, -0.083_895_072_340_965_270_996_093_8)
+        .mla(t.0, 0.109_463_557_600_975_036_621_094)
+        .mla(t.0, -0.142_626_821_994_781_494_140_625)
+        .mla(t.0, 0.199_983_194_470_405_578_613_281);
 
     t = t * (-0.333_332_866_430_282_592_773_438).add_checked_as_doubled(u * t.0);
     t = s * (1.).add_checked(t);
@@ -352,10 +352,10 @@ pub fn asinf(d: f32) -> f32 {
     };
 
     let u = 0.419_745_482_5_e-1_f32
-        .mul_add(x2, 0.242_404_602_5_e-1)
-        .mul_add(x2, 0.454_742_386_9_e-1)
-        .mul_add(x2, 0.749_502_927_1_e-1)
-        .mul_add(x2, 0.166_667_729_6)
+        .mla(x2, 0.242_404_602_5_e-1)
+        .mla(x2, 0.454_742_386_9_e-1)
+        .mla(x2, 0.749_502_927_1_e-1)
+        .mla(x2, 0.166_667_729_6)
         * x2
         * x.0;
 
@@ -388,10 +388,10 @@ pub fn acosf(d: f32) -> f32 {
     };
 
     let u = 0.419_745_482_5_e-1_f32
-        .mul_add(x2, 0.242_404_602_5_e-1)
-        .mul_add(x2, 0.454_742_386_9_e-1)
-        .mul_add(x2, 0.749_502_927_1_e-1)
-        .mul_add(x2, 0.166_667_729_6)
+        .mla(x2, 0.242_404_602_5_e-1)
+        .mla(x2, 0.454_742_386_9_e-1)
+        .mla(x2, 0.749_502_927_1_e-1)
+        .mla(x2, 0.166_667_729_6)
         * x.0
         * x2;
 
@@ -518,9 +518,9 @@ fn logk2f(d: Doubled<f32>) -> Doubled<f32> {
     let x2 = x.square();
 
     let t = 0.239_282_846_450_805_664_062_5_f32
-        .mul_add(x2.0, 0.285_182_118_415_832_519_531_25)
-        .mul_add(x2.0, 0.400_005_877_017_974_853_515_625)
-        .mul_add(x2.0, 0.666_666_686_534_881_591_796_875);
+        .mla(x2.0, 0.285_182_118_415_832_519_531_25)
+        .mla(x2.0, 0.400_005_877_017_974_853_515_625)
+        .mla(x2.0, 0.666_666_686_534_881_591_796_875);
 
     (D_LN2 * (e as f32)) + x.scale(2.) + x2 * x * t
 }
@@ -656,8 +656,8 @@ pub fn logf(mut d: f32) -> f32 {
     let x2 = x.0 * x.0;
 
     let t = 0.302_729_487_4_f32
-        .mul_add(x2, 0.399_610_817_4)
-        .mul_add(x2, 0.666_669_488);
+        .mla(x2, 0.399_610_817_4)
+        .mla(x2, 0.666_669_488);
 
     let s = (D_LN2 * (e as f32))
         .add_checked(x.scale(2.))
@@ -700,8 +700,8 @@ pub fn log10f(mut d: f32) -> f32 {
     let x2 = x.0 * x.0;
 
     let t = 0.131_428_986_8_f32
-        .mul_add(x2, 0.173_549_354_1)
-        .mul_add(x2, 0.289_530_962_7);
+        .mla(x2, 0.173_549_354_1)
+        .mla(x2, 0.289_530_962_7);
 
     let s = (Doubled::new(0.301_030_01, -1.432_098_889_e-8) * (e as f32))
         .add_checked(x * Doubled::new(0.868_588_984, -2.170_757_285_e-8))
@@ -744,8 +744,8 @@ pub fn log2f(mut d: f32) -> f32 {
     let x2 = x.0 * x.0;
 
     let t = 0.437_455_028_3_f32
-        .mul_add(x2, 0.576_479_017_7)
-        .mul_add(x2, 0.961_801_290_512);
+        .mla(x2, 0.576_479_017_7)
+        .mla(x2, 0.961_801_290_512);
 
     let mut s =
         (e as f32) + x * Doubled::new(2.885_390_043_258_666_992_2, 3.273_447_448_356_848_861_6_e-8);
@@ -782,7 +782,7 @@ pub fn log1pf(d: f32) -> f32 {
     let mut e = ilogb2kf(dp1 * (1. / 0.75));
 
     let t = ldexp3kf(1., -e);
-    let m = d.mul_add(t, t - 1.);
+    let m = d.mla(t, t - 1.);
 
     if o {
         e -= 64;
@@ -792,8 +792,8 @@ pub fn log1pf(d: f32) -> f32 {
     let x2 = x.0 * x.0;
 
     let t = 0.302_729_487_4_f32
-        .mul_add(x2, 0.399_610_817_4)
-        .mul_add(x2, 0.666_669_488);
+        .mla(x2, 0.399_610_817_4)
+        .mla(x2, 0.666_669_488);
 
     let s = (crate::f32::D_LN2 * (e as f32))
         .add_checked(x.scale(2.))
@@ -824,15 +824,15 @@ fn test_log1pf() {
 pub fn expf(d: f32) -> f32 {
     let qf = rintfk(d * R_LN2_F);
     let q = qf as i32;
-    let s = qf.mul_add(-L2U_F, d);
-    let s = qf.mul_add(-L2L_F, s);
+    let s = qf.mla(-L2U_F, d);
+    let s = qf.mla(-L2L_F, s);
 
     let mut u = 0.000_198_527_617_612_853_646_278_381_f32
-        .mul_add(s, 0.001_393_043_552_525_341_510_772_71)
-        .mul_add(s, 0.008_333_360_776_305_198_669_433_59)
-        .mul_add(s, 0.041_666_485_369_205_474_853_515_6)
-        .mul_add(s, 0.166_666_671_633_720_397_949_219)
-        .mul_add(s, 0.5);
+        .mla(s, 0.001_393_043_552_525_341_510_772_71)
+        .mla(s, 0.008_333_360_776_305_198_669_433_59)
+        .mla(s, 0.041_666_485_369_205_474_853_515_6)
+        .mla(s, 0.166_666_671_633_720_397_949_219)
+        .mla(s, 0.5);
 
     u = s * s * u + s + 1.;
 
@@ -858,15 +858,15 @@ pub fn exp10f(d: f32) -> f32 {
     let qf = rintfk(d * LOG10_2_F);
 
     let q = qf as i32;
-    let s = qf.mul_add(-L10U_F, d);
-    let s = qf.mul_add(-L10L_F, s);
+    let s = qf.mla(-L10U_F, d);
+    let s = qf.mla(-L10L_F, s);
 
     let mut u = 0.680_255_591_9_e-1
-        .mul_add(s, 0.207_808_032_6)
-        .mul_add(s, 0.539_390_385_2)
-        .mul_add(s, 0.117_124_533_7_e+1)
-        .mul_add(s, 0.203_467_869_8_e+1)
-        .mul_add(s, 0.265_094_900_1_e+1);
+        .mla(s, 0.207_808_032_6)
+        .mla(s, 0.539_390_385_2)
+        .mla(s, 0.117_124_533_7_e+1)
+        .mla(s, 0.203_467_869_8_e+1)
+        .mla(s, 0.265_094_900_1_e+1);
     let x = Doubled::new(2.3025851249694824219, -3.1705172516493593157e-08).add_checked(u * s);
     u = (1.).add_checked(x * s).normalize().0;
 
@@ -916,11 +916,11 @@ pub fn exp2f(d: f32) -> f32 {
     let s = d - qf;
 
     let mut u = 0.153_592_089_2_e-3_f32
-        .mul_add(s, 0.133_926_270_1_e-2)
-        .mul_add(s, 0.961_838_476_4_e-2)
-        .mul_add(s, 0.555_034_726_9_e-1)
-        .mul_add(s, 0.240_226_447_6)
-        .mul_add(s, 0.693_147_182_5);
+        .mla(s, 0.133_926_270_1_e-2)
+        .mla(s, 0.961_838_476_4_e-2)
+        .mla(s, 0.555_034_726_9_e-1)
+        .mla(s, 0.240_226_447_6)
+        .mla(s, 0.693_147_182_5);
     u = (1.).add_checked(u.mul_as_doubled(s)).normalize().0;
 
     if d >= 128. {
@@ -955,8 +955,8 @@ fn logkf(mut d: f32) -> Doubled<f32> {
     let x2 = x.square();
 
     let t = 0.240_320_354_700_088_500_976_562_f32
-        .mul_add(x2.0, 0.285_112_679_004_669_189_453_125)
-        .mul_add(x2.0, 0.400_007_992_982_864_379_882_812);
+        .mla(x2.0, 0.285_112_679_004_669_189_453_125)
+        .mla(x2.0, 0.400_007_992_982_864_379_882_812);
     let c = Doubled::new(
         0.666_666_626_930_236_816_406_25,
         3.691_838_612_596_143_320_843_11_e-9,
@@ -978,10 +978,10 @@ fn expkf(d: Doubled<f32>) -> f32 {
     s = s.normalize();
 
     let u = 0.001_363_246_468_827_128_410_339_36_f32
-        .mul_add(s.0, 0.008_365_969_173_610_210_418_701_17)
-        .mul_add(s.0, 0.041_671_082_377_433_776_855_468_8)
-        .mul_add(s.0, 0.166_665_524_244_308_471_679_688)
-        .mul_add(s.0, 0.499_999_850_988_388_061_523_438);
+        .mla(s.0, 0.008_365_969_173_610_210_418_701_17)
+        .mla(s.0, 0.041_671_082_377_433_776_855_468_8)
+        .mla(s.0, 0.166_665_524_244_308_471_679_688)
+        .mla(s.0, 0.499_999_850_988_388_061_523_438);
 
     let mut t = s.add_checked(s.square() * u);
 
@@ -1082,11 +1082,11 @@ pub fn cbrtf(mut d: f32) -> f32 {
     d = fabsfk(d);
 
     let mut x = (-0.601_564_466_953_277_587_890_625_f32)
-        .mul_add(d, 2.820_889_234_542_846_679_687_5)
-        .mul_add(d, -5.532_182_216_644_287_109_375)
-        .mul_add(d, 5.898_262_500_762_939_453_125)
-        .mul_add(d, -3.809_541_702_270_507_812_5)
-        .mul_add(d, 2.224_125_623_703_002_929_687_5);
+        .mla(d, 2.820_889_234_542_846_679_687_5)
+        .mla(d, -5.532_182_216_644_287_109_375)
+        .mla(d, 5.898_262_500_762_939_453_125)
+        .mla(d, -3.809_541_702_270_507_812_5)
+        .mla(d, 2.224_125_623_703_002_929_687_5);
 
     let mut y = x * x;
     y = y * y;
@@ -1153,7 +1153,7 @@ fn gammafk(a: f32) -> (Doubled<f32>, Doubled<f32>) {
     } else {
         0.110_248_955_e-3
     })
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -5.171_790_908_260_592_193_293_944_22_e-5
@@ -1163,7 +1163,7 @@ fn gammafk(a: f32) -> (Doubled<f32>, Doubled<f32>) {
             0.816_001_993_4_e-4
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.000_592_166_437_353_693_882_857_342_347
@@ -1173,7 +1173,7 @@ fn gammafk(a: f32) -> (Doubled<f32>, Doubled<f32>) {
             0.152_846_885_6_e-3
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             6.972_813_758_365_857_774_037_435_39_e-5
@@ -1183,7 +1183,7 @@ fn gammafk(a: f32) -> (Doubled<f32>, Doubled<f32>) {
             -0.235_506_871_8_e-3
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.000_784_039_221_720_066_627_493_314_301
@@ -1193,7 +1193,7 @@ fn gammafk(a: f32) -> (Doubled<f32>, Doubled<f32>) {
             0.496_224_209_2_e-3
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.000_229_472_093_621_399_176_949_318_732
@@ -1203,7 +1203,7 @@ fn gammafk(a: f32) -> (Doubled<f32>, Doubled<f32>) {
             -0.119_348_801_7_e-2
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.002_681_327_160_493_827_160_473_958_490
@@ -1213,7 +1213,7 @@ fn gammafk(a: f32) -> (Doubled<f32>, Doubled<f32>) {
             0.289_159_943_3_e-2
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.003_472_222_222_222_222_222_175_164_840
@@ -1223,7 +1223,7 @@ fn gammafk(a: f32) -> (Doubled<f32>, Doubled<f32>) {
             -0.738_545_181_2_e-2
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.083_333_333_333_333_333_335_592_087_900
diff --git a/src/f32/u15.rs b/src/f32/u15.rs
index 18e3932..f43c276 100644
--- a/src/f32/u15.rs
+++ b/src/f32/u15.rs
@@ -27,7 +27,7 @@ pub fn erfcf(mut a: f32) -> f32 {
     } else {
         0.111_534_416_7_e+1
     }
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.600_016_617_7_e-3
@@ -39,7 +39,7 @@ pub fn erfcf(mut a: f32) -> f32 {
             -0.945_490_419_9
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.166_570_360_3_e-2
@@ -51,7 +51,7 @@ pub fn erfcf(mut a: f32) -> f32 {
             -0.366_725_951_4
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.179_515_627_7_e-3
@@ -63,7 +63,7 @@ pub fn erfcf(mut a: f32) -> f32 {
             0.715_566_337_1
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.191_410_612_3_e-1
diff --git a/src/f32/u35.rs b/src/f32/u35.rs
index 4465f44..32f7ba0 100644
--- a/src/f32/u35.rs
+++ b/src/f32/u35.rs
@@ -13,16 +13,16 @@ pub fn sinf(mut d: f32) -> f32 {
     if fabsfk(d) < TRIGRANGEMAX2_F {
         let qf = rintfk(d * FRAC_1_PI);
         q = qf as i32;
-        d = qf.mul_add(-PI_A2_F, d);
-        d = qf.mul_add(-PI_B2_F, d);
-        d = qf.mul_add(-PI_C2_F, d);
+        d = qf.mla(-PI_A2_F, d);
+        d = qf.mla(-PI_B2_F, d);
+        d = qf.mla(-PI_C2_F, d);
     } else if fabsfk(d) < TRIGRANGEMAX_F {
         let qf = rintfk(d * FRAC_1_PI);
         q = qf as i32;
-        d = qf.mul_add(-PI_A_F, d);
-        d = qf.mul_add(-PI_B_F, d);
-        d = qf.mul_add(-PI_C_F, d);
-        d = qf.mul_add(-PI_D_F, d);
+        d = qf.mla(-PI_A_F, d);
+        d = qf.mla(-PI_B_F, d);
+        d = qf.mla(-PI_C_F, d);
+        d = qf.mla(-PI_D_F, d);
     } else {
         let (mut dfidf, dfii) = rempif(t);
         q = ((dfii & 3) * 2 + ((dfidf.0 > 0.) as i32) + 1) >> 2;
@@ -45,14 +45,14 @@ pub fn sinf(mut d: f32) -> f32 {
     }
 
     let u = 2.608_315_980_978_659_354_150_3_e-6_f32
-        .mul_add(s, -0.000_198_106_907_191_686_332_225_8)
-        .mul_add(s, 0.008_333_078_585_565_090_179_443_36)
-        .mul_add(s, -0.166_666_597_127_914_428_710_938);
+        .mla(s, -0.000_198_106_907_191_686_332_225_8)
+        .mla(s, 0.008_333_078_585_565_090_179_443_36)
+        .mla(s, -0.166_666_597_127_914_428_710_938);
 
     if t.is_neg_zero() {
         -0.
     } else {
-        s.mul_add(u * d, d)
+        s.mla(u * d, d)
     }
 }
 
@@ -72,16 +72,16 @@ pub fn cosf(mut d: f32) -> f32 {
     if fabsfk(d) < TRIGRANGEMAX2_F {
         q = 1 + 2 * (rintfk(d * FRAC_1_PI - 0.5) as i32);
         let qf = q as f32;
-        d = qf.mul_add(-PI_A2_F * 0.5, d);
-        d = qf.mul_add(-PI_B2_F * 0.5, d);
-        d = qf.mul_add(-PI_C2_F * 0.5, d);
+        d = qf.mla(-PI_A2_F * 0.5, d);
+        d = qf.mla(-PI_B2_F * 0.5, d);
+        d = qf.mla(-PI_C2_F * 0.5, d);
     } else if fabsfk(d) < TRIGRANGEMAX_F {
         q = 1 + 2 * (rintfk(d * FRAC_1_PI - 0.5) as i32);
         let qf = q as f32;
-        d = qf.mul_add(-PI_A_F * 0.5, d);
-        d = qf.mul_add(-PI_B_F * 0.5, d);
-        d = qf.mul_add(-PI_C_F * 0.5, d);
-        d = qf.mul_add(-PI_D_F * 0.5, d);
+        d = qf.mla(-PI_A_F * 0.5, d);
+        d = qf.mla(-PI_B_F * 0.5, d);
+        d = qf.mla(-PI_C_F * 0.5, d);
+        d = qf.mla(-PI_D_F * 0.5, d);
     } else {
         let (mut dfidf, dfii) = rempif(t);
         q = ((dfii & 3) * 2 + ((dfidf.0 > 0.) as i32) + 7) >> 1;
@@ -104,11 +104,11 @@ pub fn cosf(mut d: f32) -> f32 {
     }
 
     let u = 2.608_315_980_978_659_354_150_3_e-6_f32
-        .mul_add(s, -0.000_198_106_907_191_686_332_225_8)
-        .mul_add(s, 0.008_333_078_585_565_090_179_443_36)
-        .mul_add(s, -0.166_666_597_127_914_428_710_938);
+        .mla(s, -0.000_198_106_907_191_686_332_225_8)
+        .mla(s, 0.008_333_078_585_565_090_179_443_36)
+        .mla(s, -0.166_666_597_127_914_428_710_938);
 
-    s.mul_add(u * d, d)
+    s.mla(u * d, d)
 }
 
 #[test]
@@ -130,16 +130,16 @@ pub fn sincosf(d: f32) -> (f32, f32) {
     if fabsfk(d) < TRIGRANGEMAX2_F {
         let qf = rintfk(d * FRAC_2_PI);
         q = qf as i32;
-        s = qf.mul_add(-PI_A2_F * 0.5, s);
-        s = qf.mul_add(-PI_B2_F * 0.5, s);
-        s = qf.mul_add(-PI_C2_F * 0.5, s);
+        s = qf.mla(-PI_A2_F * 0.5, s);
+        s = qf.mla(-PI_B2_F * 0.5, s);
+        s = qf.mla(-PI_C2_F * 0.5, s);
     } else if fabsfk(d) < TRIGRANGEMAX_F {
         let qf = rintfk(d * FRAC_2_PI);
         q = qf as i32;
-        s = qf.mul_add(-PI_A_F * 0.5, s);
-        s = qf.mul_add(-PI_B_F * 0.5, s);
-        s = qf.mul_add(-PI_C_F * 0.5, s);
-        s = qf.mul_add(-PI_D_F * 0.5, s);
+        s = qf.mla(-PI_A_F * 0.5, s);
+        s = qf.mla(-PI_B_F * 0.5, s);
+        s = qf.mla(-PI_C_F * 0.5, s);
+        s = qf.mla(-PI_D_F * 0.5, s);
     } else {
         let (dfidf, dfii) = rempif(d);
         q = dfii;
@@ -154,17 +154,17 @@ pub fn sincosf(d: f32) -> (f32, f32) {
     s = s * s;
 
     let mut u = (-0.000_195_169_282_960_705_459_117_889_f32)
-        .mul_add(s, 0.008_332_157_507_538_795_471_191_41)
-        .mul_add(s, -0.166_666_537_523_269_653_320_312);
+        .mla(s, 0.008_332_157_507_538_795_471_191_41)
+        .mla(s, -0.166_666_537_523_269_653_320_312);
     u = u * s * t;
 
     let mut rsin = if d.is_neg_zero() { -0. } else { t + u };
 
     u = (-2.718_118_423_672_422_068_193_55_e-7_f32)
-        .mul_add(s, 2.479_904_469_510_074_704_885_48_e-5)
-        .mul_add(s, -0.001_388_887_874_782_085_418_701_17)
-        .mul_add(s, 0.041_666_664_183_139_801_025_390_6)
-        .mul_add(s, -0.5);
+        .mla(s, 2.479_904_469_510_074_704_885_48_e-5)
+        .mla(s, -0.001_388_887_874_782_085_418_701_17)
+        .mla(s, 0.041_666_664_183_139_801_025_390_6)
+        .mla(s, -0.5);
 
     let mut rcos = u * s + 1.;
 
@@ -206,16 +206,16 @@ pub fn tanf(d: f32) -> f32 {
     if fabsfk(d) < TRIGRANGEMAX2_F * 0.5 {
         let qf = rintfk(d * FRAC_2_PI);
         q = qf as i32;
-        x = qf.mul_add(-PI_A2_F * 0.5, x);
-        x = qf.mul_add(-PI_B2_F * 0.5, x);
-        x = qf.mul_add(-PI_C2_F * 0.5, x);
+        x = qf.mla(-PI_A2_F * 0.5, x);
+        x = qf.mla(-PI_B2_F * 0.5, x);
+        x = qf.mla(-PI_C2_F * 0.5, x);
     } else if fabsfk(d) < TRIGRANGEMAX_F {
         let qf = rintfk(d * FRAC_2_PI);
         q = qf as i32;
-        x = qf.mul_add(-PI_A_F * 0.5, x);
-        x = qf.mul_add(-PI_B_F * 0.5, x);
-        x = qf.mul_add(-PI_C_F * 0.5, x);
-        x = qf.mul_add(-PI_D_F * 0.5, x);
+        x = qf.mla(-PI_A_F * 0.5, x);
+        x = qf.mla(-PI_B_F * 0.5, x);
+        x = qf.mla(-PI_C_F * 0.5, x);
+        x = qf.mla(-PI_D_F * 0.5, x);
     } else {
         let (dfidf, dfii) = rempif(d);
         q = dfii;
@@ -246,7 +246,7 @@ pub fn tanf(d: f32) -> f32 {
         0.333_331_853_151_321_411_132_812,
     );
 
-    u = s.mul_add(u * x, x);
+    u = s.mla(u * x, x);
 
     if (q & 1) != 0 {
         1. / u
@@ -276,16 +276,16 @@ pub fn sincospif(d: f32) -> (f32, f32) {
     let s = s * s;
 
     let mut rsin = (-0.360_092_526_5_e-4_f32)
-        .mul_add(s, 0.249_008_811_1_e-2)
-        .mul_add(s, -0.807_455_107_6_e-1)
-        .mul_add(s, 0.785_398_185_3)
+        .mla(s, 0.249_008_811_1_e-2)
+        .mla(s, -0.807_455_107_6_e-1)
+        .mla(s, 0.785_398_185_3)
         * t;
 
     let mut rcos = 0.353_981_522_5_e-5_f32
-        .mul_add(s, -0.325_957_400_5_e-3)
-        .mul_add(s, 0.158_543_158_3_e-1)
-        .mul_add(s, -0.308_425_128_5)
-        .mul_add(s, 1.);
+        .mla(s, -0.325_957_400_5_e-3)
+        .mla(s, 0.158_543_158_3_e-1)
+        .mla(s, -0.308_425_128_5)
+        .mla(s, 1.);
 
     if (q & 2) != 0 {
         core::mem::swap(&mut rcos, &mut rsin);
@@ -424,11 +424,11 @@ pub fn asinf(d: f32) -> f32 {
     let x = if o { fabsfk(d) } else { x2.sqrt() };
 
     let u = 0.419_745_482_5_e-1_f32
-        .mul_add(x2, 0.242_404_602_5_e-1)
-        .mul_add(x2, 0.454_742_386_9_e-1)
-        .mul_add(x2, 0.749_502_927_1_e-1)
-        .mul_add(x2, 0.166_667_729_6)
-        .mul_add(x * x2, x);
+        .mla(x2, 0.242_404_602_5_e-1)
+        .mla(x2, 0.454_742_386_9_e-1)
+        .mla(x2, 0.749_502_927_1_e-1)
+        .mla(x2, 0.166_667_729_6)
+        .mla(x * x2, x);
 
     let r = if o { u } else { FRAC_PI_2 - 2. * u };
     r.mul_sign(d)
@@ -450,10 +450,10 @@ pub fn acosf(d: f32) -> f32 {
     x = if fabsfk(d) == 1. { 0. } else { x };
 
     let mut u = 0.419_745_482_5_e-1_f32
-        .mul_add(x2, 0.242_404_602_5_e-1)
-        .mul_add(x2, 0.454_742_386_9_e-1)
-        .mul_add(x2, 0.749_502_927_1_e-1)
-        .mul_add(x2, 0.166_667_729_6);
+        .mla(x2, 0.242_404_602_5_e-1)
+        .mla(x2, 0.454_742_386_9_e-1)
+        .mla(x2, 0.749_502_927_1_e-1)
+        .mla(x2, 0.166_667_729_6);
 
     u *= x * x2;
 
@@ -529,8 +529,8 @@ fn expm1kf(d: f32) -> f32 {
     let qf = rintfk(d * R_LN2_F);
 
     let q = qf as i32;
-    let s = qf.mul_add(-L2U_F, d);
-    let s = qf.mul_add(-L2L_F, s);
+    let s = qf.mla(-L2U_F, d);
+    let s = qf.mla(-L2L_F, s);
 
     let s2 = s * s;
     let s4 = s2 * s2;
@@ -650,10 +650,10 @@ pub fn logf(mut d: f32) -> f32 {
     let x2 = x * x;
 
     let t = 0.239_282_846_450_805_664_062_5_f32
-        .mul_add(x2, 0.285_182_118_415_832_519_531_25)
-        .mul_add(x2, 0.400_005_877_017_974_853_515_625)
-        .mul_add(x2, 0.666_666_686_534_881_591_796_875)
-        .mul_add(x2, 2.);
+        .mla(x2, 0.285_182_118_415_832_519_531_25)
+        .mla(x2, 0.400_005_877_017_974_853_515_625)
+        .mla(x2, 0.666_666_686_534_881_591_796_875)
+        .mla(x2, 2.);
 
     if d == 0. {
         f32::NEG_INFINITY
@@ -691,10 +691,10 @@ pub fn log2f(mut d: f32) -> f32 {
     let x2 = x * x;
 
     let t = 0.437_408_834_7
-        .mul_add(x2, 0.576_484_382_2)
-        .mul_add(x2, 0.961_802_423);
+        .mla(x2, 0.576_484_382_2)
+        .mla(x2, 0.961_802_423);
 
-    let r = (x2 * x).mul_add(t, x.mul_add(0.288_539_004_3_e+1, e as f32));
+    let r = (x2 * x).mla(t, x.mla(0.288_539_004_3_e+1, e as f32));
 
     if d == 0. {
         f32::NEG_INFINITY
@@ -718,16 +718,16 @@ fn test_log2f() {
 pub fn exp10f(d: f32) -> f32 {
     let q = rintfk(d * LOG10_2_F);
 
-    let mut s = q.mul_add(-L10U_F, d);
-    s = q.mul_add(-L10L_F, s);
+    let mut s = q.mla(-L10U_F, d);
+    s = q.mla(-L10L_F, s);
 
     let mut u = 0.206_400_498_7
-        .mul_add(s, 0.541_787_743_6)
-        .mul_add(s, 0.117_128_682_1_e+1)
-        .mul_add(s, 0.203_465_604_8_e+1)
-        .mul_add(s, 0.265_094_876_3_e+1)
-        .mul_add(s, 0.230_258_512_5_e+1)
-        .mul_add(s, 0.1_e+1);
+        .mla(s, 0.541_787_743_6)
+        .mla(s, 0.117_128_682_1_e+1)
+        .mla(s, 0.203_465_604_8_e+1)
+        .mla(s, 0.265_094_876_3_e+1)
+        .mla(s, 0.230_258_512_5_e+1)
+        .mla(s, 0.1_e+1);
 
     u = ldexp2kf(u, q as i32);
 
@@ -754,12 +754,12 @@ pub fn exp2f(d: f32) -> f32 {
     let s = d - q;
 
     let mut u = 0.153_592_089_2_e-3
-        .mul_add(s, 0.133_926_270_1_e-2)
-        .mul_add(s, 0.961_838_476_4_e-2)
-        .mul_add(s, 0.555_034_726_9_e-1)
-        .mul_add(s, 0.240_226_447_6)
-        .mul_add(s, 0.693_147_182_5)
-        .mul_add(s, 0.1_e+1);
+        .mla(s, 0.133_926_270_1_e-2)
+        .mla(s, 0.961_838_476_4_e-2)
+        .mla(s, 0.555_034_726_9_e-1)
+        .mla(s, 0.240_226_447_6)
+        .mla(s, 0.693_147_182_5)
+        .mla(s, 0.1_e+1);
 
     u = ldexp2kf(u, q as i32);
 
@@ -839,11 +839,11 @@ pub fn cbrtf(mut d: f32) -> f32 {
     d = fabsfk(d);
 
     let x = (-0.601_564_466_953_277_587_890_625_f32)
-        .mul_add(d, 2.820_889_234_542_846_679_687_5)
-        .mul_add(d, -5.532_182_216_644_287_109_375)
-        .mul_add(d, 5.898_262_500_762_939_453_125)
-        .mul_add(d, -3.809_541_702_270_507_812_5)
-        .mul_add(d, 2.224_125_623_703_002_929_687_5);
+        .mla(d, 2.820_889_234_542_846_679_687_5)
+        .mla(d, -5.532_182_216_644_287_109_375)
+        .mla(d, 5.898_262_500_762_939_453_125)
+        .mla(d, -3.809_541_702_270_507_812_5)
+        .mla(d, 2.224_125_623_703_002_929_687_5);
 
     let y = d * x * x;
     (y - (2. / 3.) * y * (y * x - 1.)) * q
diff --git a/src/f32x.rs b/src/f32x.rs
index 69ad49a..a54e0fe 100644
--- a/src/f32x.rs
+++ b/src/f32x.rs
@@ -552,10 +552,10 @@ macro_rules! impl_math_f32 {
 
         impl MulAdd for F32x {
             #[inline]
-            fn mul_add(self, y: Self, z: Self) -> Self {
+            fn mla(self, y: Self, z: Self) -> Self {
                 if cfg!(target_feature = "fma") {
                     use std::simd::{StdFloat};
-                    <Self as StdFloat>::mul_add(self, y, z)
+                    self.mul_add(y, z)
                 } else {
                     self * y + z
                 }
@@ -567,7 +567,7 @@ macro_rules! impl_math_f32 {
             fn mul_sub(self, y: Self, z: Self) -> Self {
                 if cfg!(target_feature = "fma") {
                     use std::simd::{StdFloat};
-                    <Self as StdFloat>::mul_add(self, y, -z)
+                    self.mul_add(y, -z)
                 } else {
                     self * y - z
                 }
@@ -579,7 +579,7 @@ macro_rules! impl_math_f32 {
             fn neg_mul_add(self, y: Self, z: Self) -> Self {
                 if cfg!(target_feature = "fma") {
                     use std::simd::{StdFloat};
-                    <Self as StdFloat>::mul_add(-self, y, z)
+                    (-self).mul_add(y, z)
                 } else {
                     -self * y + z
                 }
@@ -777,12 +777,12 @@ macro_rules! impl_math_f32 {
                 let c = F1_23X.mul_sign(x);
                 let rint4x = (F32x::splat(4.) * x).abs().simd_gt(F1_23X).select(
                     (F32x::splat(4.) * x),
-                    (F32x::splat(4.).mul_add(x, c) - c).or_sign(x)
+                    (F32x::splat(4.).mla(x, c) - c).or_sign(x)
                 );
                 let rintx  = x.abs().simd_gt(F1_23X).select(x, ((x + c) - c).or_sign(x));
 
-                let fr = F32x::splat(-0.25).mul_add(rint4x, x);
-                let vi = F32x::splat(-4.).mul_add(rintx, rint4x).trunci();
+                let fr = F32x::splat(-0.25).mla(rint4x, x);
+                let vi = F32x::splat(-4.).mla(rintx, rint4x).trunci();
                 (fr, vi)
             }
         }
@@ -847,9 +847,9 @@ macro_rules! impl_math_f32 {
             s += q.cast::<f32>() * (-L2L_F);
 
             let u = F32x::splat(0.198_096_022_4_e-3)
-                .mul_add(s.0, F32x::splat(0.139_425_648_4_e-2))
-                .mul_add(s.0, F32x::splat(0.833_345_670_3_e-2))
-                .mul_add(s.0, F32x::splat(0.416_663_736_1_e-1));
+                .mla(s.0, F32x::splat(0.139_425_648_4_e-2))
+                .mla(s.0, F32x::splat(0.833_345_670_3_e-2))
+                .mla(s.0, F32x::splat(0.416_663_736_1_e-1));
 
             let mut t = s * u + F32x::splat(0.166_666_659_414_234_244_790_680_580_464);
             t = s * t + HALF;
@@ -1206,8 +1206,8 @@ macro_rules! impl_math_f32 {
 
             let u = o
                 .select_splat(-0.243_061_180_1_e-7, 0.309_384_205_4_e-6)
-                .mul_add(s, o.select_splat(0.359_057_708_e-5, -0.365_730_738_8_e-4))
-                .mul_add(s, o.select_splat(-0.325_991_772_1_e-3, 0.249_039_358_5_e-2));
+                .mla(s, o.select_splat(0.359_057_708_e-5, -0.365_730_738_8_e-4))
+                .mla(s, o.select_splat(-0.325_991_772_1_e-3, 0.249_039_358_5_e-2));
             let mut x = u * s
                 + o.select_doubled(
                     Doubled::new(
@@ -1259,8 +1259,8 @@ macro_rules! impl_math_f32 {
 
             let u = o
                 .select_splat(-0.243_061_180_1_e-7, 0.309_384_205_4_e-6)
-                .mul_add(s, o.select_splat(0.359_057_708_e-5, -0.365_730_738_8_e-4))
-                .mul_add(s, o.select_splat(-0.325_991_772_1_e-3, 0.249_039_358_5_e-2));
+                .mla(s, o.select_splat(0.359_057_708_e-5, -0.365_730_738_8_e-4))
+                .mla(s, o.select_splat(-0.325_991_772_1_e-3, 0.249_039_358_5_e-2));
             let mut x = u * s
                 + o.select_doubled(
                     Doubled::new(
@@ -1299,8 +1299,8 @@ macro_rules! impl_math_f32 {
         #[inline]
         fn expm1fk(d: F32x) -> F32x {
             let q = (d * R_LN2_F).roundi();
-            let s = q.cast::<f32>().mul_add(-L2U_F, d);
-            let s = q.cast::<f32>().mul_add(-L2L_F, s);
+            let s = q.cast::<f32>().mla(-L2U_F, d);
+            let s = q.cast::<f32>().mla(-L2L_F, s);
 
             let s2 = s * s;
             let s4 = s2 * s2;
@@ -1313,7 +1313,7 @@ macro_rules! impl_math_f32 {
                 0.166_666_671_633_720_397_949_219,
                 0.5);
 
-            let u = (s * s).mul_add(u, s);
+            let u = (s * s).mla(u, s);
 
             q.simd_eq(I32x::splat(0))
                 .select(u, ldexp2kf(u + ONE, q) - ONE)
diff --git a/src/f32x/fast_impl.rs b/src/f32x/fast_impl.rs
index 2b7ac6c..1945829 100644
--- a/src/f32x/fast_impl.rs
+++ b/src/f32x/fast_impl.rs
@@ -11,14 +11,14 @@ macro_rules! impl_math_f32_fast {
             let s = d * FRAC_1_PI;
             let mut u = s.round();
             let q = s.roundi();
-            d = u.mul_add(-PI, d);
+            d = u.mla(-PI, d);
 
             let s = d * d;
 
             u = F32x::splat(-0.188_174_817_6_e-3)
-                .mul_add(s, F32x::splat(0.832_350_272_7_e-2))
-                .mul_add(s, F32x::splat(-0.166_665_136_8));
-            u = (s * d).mul_add(u, d);
+                .mla(s, F32x::splat(0.832_350_272_7_e-2))
+                .mla(s, F32x::splat(-0.166_665_136_8));
+            u = (s * d).mla(u, d);
 
             u = F32x::from_bits(
                 ((q & I32x::splat(1)).simd_eq(I32x::splat(1)).to_int().cast() & (-ZERO).to_bits())
@@ -50,17 +50,17 @@ macro_rules! impl_math_f32_fast {
         pub fn cosf(mut d: F32x) -> F32x {
             let t = d;
 
-            let s = d.mul_add(FRAC_1_PI, -HALF);
+            let s = d.mla(FRAC_1_PI, -HALF);
             let mut u = s.round();
             let q = s.roundi();
-            d = u.mul_add(-PI, d - FRAC_PI_2);
+            d = u.mla(-PI, d - FRAC_PI_2);
 
             let s = d * d;
 
             u = F32x::splat(-0.188_174_817_6_e-3)
-                .mul_add(s, F32x::splat(0.832_350_272_7_e-2))
-                .mul_add(s, F32x::splat(-0.166_665_136_8));
-            u = (s * d).mul_add(u, d);
+                .mla(s, F32x::splat(0.832_350_272_7_e-2))
+                .mla(s, F32x::splat(-0.166_665_136_8));
+            u = (s * d).mla(u, d);
 
             u = F32x::from_bits(
                 ((q & I32x::splat(1)).simd_eq(I32x::splat(0)).to_int().cast() & (-ZERO).to_bits())
@@ -103,18 +103,18 @@ macro_rules! impl_math_f32_fast {
             let x2 = x * x;
 
             let t = F32x::splat(0.239_282_846_450_805_664_062_5)
-                .mul_add(x2, F32x::splat(0.285_182_118_415_832_519_531_25))
-                .mul_add(x2, F32x::splat(0.400_005_877_017_974_853_515_625))
-                .mul_add(x2, F32x::splat(0.666_666_686_534_881_591_796_875))
-                .mul_add(x2, F32x::splat(2.));
+                .mla(x2, F32x::splat(0.285_182_118_415_832_519_531_25))
+                .mla(x2, F32x::splat(0.400_005_877_017_974_853_515_625))
+                .mla(x2, F32x::splat(0.666_666_686_534_881_591_796_875))
+                .mla(x2, F32x::splat(2.));
 
             //if !cfg!(feature = "enable_avx512f") && !cfg!(feature = "enable_avx512fnofma") {
-            x.mul_add(
+            x.mla(
                 t,
                 F32x::splat(0.693_147_180_559_945_286_226_764) * e.cast(),
             )
             /* } else {
-              x.mul_add(t, F32x::splat(0.693_147_180_559_945_286_226_764) * e)
+              x.mla(t, F32x::splat(0.693_147_180_559_945_286_226_764) * e)
             }*/
         }
 
@@ -122,17 +122,17 @@ macro_rules! impl_math_f32_fast {
         fn expk3f(d: F32x) -> F32x {
             let q = (d * R_LN2_F).roundi();
 
-            let mut s = q.cast::<f32>().mul_add(-L2U_F, d);
-            s = q.cast::<f32>().mul_add(-L2L_F, s);
+            let mut s = q.cast::<f32>().mla(-L2U_F, d);
+            s = q.cast::<f32>().mla(-L2L_F, s);
 
             let mut u = F32x::splat(0.000_198_527_617_612_853_646_278_381)
-                .mul_add(s, F32x::splat(0.001_393_043_552_525_341_510_772_71))
-                .mul_add(s, F32x::splat(0.008_333_360_776_305_198_669_433_59))
-                .mul_add(s, F32x::splat(0.041_666_485_369_205_474_853_515_6))
-                .mul_add(s, F32x::splat(0.166_666_671_633_720_397_949_219))
-                .mul_add(s, HALF);
+                .mla(s, F32x::splat(0.001_393_043_552_525_341_510_772_71))
+                .mla(s, F32x::splat(0.008_333_360_776_305_198_669_433_59))
+                .mla(s, F32x::splat(0.041_666_485_369_205_474_853_515_6))
+                .mla(s, F32x::splat(0.166_666_671_633_720_397_949_219))
+                .mla(s, HALF);
 
-            u = (s * s).mul_add(u, s + ONE);
+            u = (s * s).mla(u, s + ONE);
             u = ldexp2kf(u, q);
 
             F32x::from_bits(!d.simd_lt(F32x::splat(-104.)).to_int().cast::<u32>() & u.to_bits())
diff --git a/src/f32x/u05_impl.rs b/src/f32x/u05_impl.rs
index db4c175..4e0a91a 100644
--- a/src/f32x/u05_impl.rs
+++ b/src/f32x/u05_impl.rs
@@ -19,8 +19,8 @@ macro_rules! impl_math_f32_u05 {
             let s2 = t.mul_as_doubled(t);
 
             let u = F32x::splat(0.309_384_205_4_e-6)
-                .mul_add(s, F32x::splat(-0.365_730_738_8_e-4))
-                .mul_add(s, F32x::splat(0.249_039_358_5_e-2));
+                .mla(s, F32x::splat(-0.365_730_738_8_e-4))
+                .mla(s, F32x::splat(0.249_039_358_5_e-2));
             let mut x = u * s
                 + Doubled::new(
                     F32x::splat(-0.080_745_510_756_969_451_904),
@@ -38,8 +38,8 @@ macro_rules! impl_math_f32_u05 {
             let rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F32x::splat(-0.243_061_180_1_e-7)
-                .mul_add(s, F32x::splat(0.359_057_708_e-5))
-                .mul_add(s, F32x::splat(-0.325_991_772_1_e-3));
+                .mla(s, F32x::splat(0.359_057_708_e-5))
+                .mla(s, F32x::splat(-0.325_991_772_1_e-3));
             x = u * s
                 + Doubled::new(
                     F32x::splat(0.015_854_343_771_934_509_277),
diff --git a/src/f32x/u10_impl.rs b/src/f32x/u10_impl.rs
index 64882c8..9ce50fc 100644
--- a/src/f32x/u10_impl.rs
+++ b/src/f32x/u10_impl.rs
@@ -13,7 +13,7 @@ macro_rules! impl_math_f32_u10 {
             if d.abs().simd_lt(TRIGRANGEMAX2_F).all() {
                 let u = (d * FRAC_1_PI).round();
                 q = u.roundi();
-                let v = u.mul_add(-PI_A2_F, d);
+                let v = u.mla(-PI_A2_F, d);
                 s = v.add_as_doubled(u * (-PI_B2_F));
                 s = s.add_checked(u * (-PI_C2_F));
             } else {
@@ -39,8 +39,8 @@ macro_rules! impl_math_f32_u10 {
             let s = s.square();
 
             let mut u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
-                .mul_add(s.0, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
-                .mul_add(s.0, F32x::splat(0.008_333_078_585_565_090_179_443_36));
+                .mla(s.0, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
+                .mla(s.0, F32x::splat(0.008_333_078_585_565_090_179_443_36));
 
             let x = ONE.add_checked(
                 F32x::splat(-0.166_666_597_127_914_428_710_938).add_checked_as_doubled(u * s.0) * s,
@@ -66,7 +66,7 @@ macro_rules! impl_math_f32_u10 {
         pub fn sinf_deterministic(d: F32x) -> F32x {
             let u = (d * FRAC_1_PI).round();
             let mut q = u.roundi();
-            let v = u.mul_add((-PI_A2_F), d);
+            let v = u.mla((-PI_A2_F), d);
             let mut s = v.add_as_doubled(u * (-PI_B2_F));
             s = s.add_checked(u * (-PI_C2_F));
             let g = d.abs().simd_lt(TRIGRANGEMAX2_F);
@@ -97,8 +97,8 @@ macro_rules! impl_math_f32_u10 {
             s = s.square();
 
             let mut u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
-                .mul_add(s.0, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
-                .mul_add(s.0, F32x::splat(0.008_333_078_585_565_090_179_443_36));
+                .mla(s.0, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
+                .mla(s.0, F32x::splat(0.008_333_078_585_565_090_179_443_36));
 
             let x = ONE.add_checked(
                 F32x::splat(-0.166_666_597_127_914_428_710_938).add_checked_as_doubled(u * s.0) * s,
@@ -140,9 +140,9 @@ macro_rules! impl_math_f32_u10 {
             let mut s: Doubled<F32x>;
 
             if d.abs().simd_lt(TRIGRANGEMAX2_F).all() {
-                let dq = (d.mul_add(FRAC_1_PI, F32x::splat(-0.5)))
+                let dq = (d.mla(FRAC_1_PI, F32x::splat(-0.5)))
                     .round()
-                    .mul_add(F32x::splat(2.), ONE);
+                    .mla(F32x::splat(2.), ONE);
                 q = dq.roundi();
                 s = d.add_as_doubled(dq * (-PI_A2_F) * HALF);
                 s += dq * (-PI_B2_F) * HALF;
@@ -171,8 +171,8 @@ macro_rules! impl_math_f32_u10 {
             s = s.square();
 
             let u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
-                .mul_add(s.0, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
-                .mul_add(s.0, F32x::splat(0.008_333_078_585_565_090_179_443_36));
+                .mla(s.0, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
+                .mla(s.0, F32x::splat(0.008_333_078_585_565_090_179_443_36));
 
             let x = ONE.add_checked(
                 F32x::splat(-0.166_666_597_127_914_428_710_938).add_checked_as_doubled(u * s.0) * s,
@@ -194,9 +194,9 @@ macro_rules! impl_math_f32_u10 {
         ///
         /// NOTE: This version is slower, but SIMD lanes are independent
         pub fn cosf_deterministic(d: F32x) -> F32x {
-            let dq = (d.mul_add(FRAC_1_PI, F32x::splat(-0.5)))
+            let dq = (d.mla(FRAC_1_PI, F32x::splat(-0.5)))
                 .round()
-                .mul_add(F32x::splat(2.), ONE);
+                .mla(F32x::splat(2.), ONE);
             let mut q = dq.roundi();
             let mut s = d.add_as_doubled(dq * (-PI_A2_F * HALF));
             s += dq * (-PI_B2_F * HALF);
@@ -230,8 +230,8 @@ macro_rules! impl_math_f32_u10 {
             s = s.square();
 
             let u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
-                .mul_add(s.0, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
-                .mul_add(s.0, F32x::splat(0.008_333_078_585_565_090_179_443_36));
+                .mla(s.0, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
+                .mla(s.0, F32x::splat(0.008_333_078_585_565_090_179_443_36));
 
             let x = ONE.add_checked(
                 F32x::splat(-0.166_666_597_127_914_428_710_938).add_checked_as_doubled(u * s.0) * s,
@@ -276,7 +276,7 @@ macro_rules! impl_math_f32_u10 {
             if d.abs().simd_lt(TRIGRANGEMAX2_F).all() {
                 let u = (d * FRAC_2_PI).round();
                 q = u.roundi();
-                let v = u.mul_add(-PI_A2_F * HALF, d);
+                let v = u.mla(-PI_A2_F * HALF, d);
                 s = v.add_as_doubled(u * (-PI_B2_F) * HALF);
                 s = s.add_checked(u * (-PI_C2_F) * HALF);
             } else {
@@ -292,8 +292,8 @@ macro_rules! impl_math_f32_u10 {
             s.0 = s.square_as_f();
 
             let u = F32x::splat(-0.000_195_169_282_960_705_459_117_889)
-                .mul_add(s.0, F32x::splat(0.008_332_157_507_538_795_471_191_41))
-                .mul_add(s.0, F32x::splat(-0.166_666_537_523_269_653_320_312))
+                .mla(s.0, F32x::splat(0.008_332_157_507_538_795_471_191_41))
+                .mla(s.0, F32x::splat(-0.166_666_537_523_269_653_320_312))
                 * (s.0 * t.0);
 
             let x = t.add_checked(u);
@@ -302,10 +302,10 @@ macro_rules! impl_math_f32_u10 {
             let rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F32x::splat(-2.718_118_423_672_422_068_193_55_e-7)
-                .mul_add(s.0, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
-                .mul_add(s.0, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
-                .mul_add(s.0, F32x::splat(0.041_666_664_183_139_801_025_390_6))
-                .mul_add(s.0, F32x::splat(-0.5));
+                .mla(s.0, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
+                .mla(s.0, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
+                .mla(s.0, F32x::splat(0.041_666_664_183_139_801_025_390_6))
+                .mla(s.0, F32x::splat(-0.5));
 
             let x = ONE.add_checked(s.0.mul_as_doubled(u));
             let ry = F32x::from(x);
@@ -339,7 +339,7 @@ macro_rules! impl_math_f32_u10 {
         pub fn sincosf_deterministic(d: F32x) -> (F32x, F32x) {
             let u = (d * FRAC_2_PI).round();
             let mut q = u.roundi();
-            let v = u.mul_add(-PI_A2_F * HALF, d);
+            let v = u.mla(-PI_A2_F * HALF, d);
             let mut s = v.add_as_doubled(u * (-PI_B2_F * HALF));
             s = s.add_checked(u * (-PI_C2_F * HALF));
             let g = d.abs().simd_lt(TRIGRANGEMAX2_F);
@@ -358,8 +358,8 @@ macro_rules! impl_math_f32_u10 {
             s.0 = s.square_as_f();
 
             let u = F32x::splat(-0.000_195_169_282_960_705_459_117_889)
-                .mul_add(s.0, F32x::splat(0.008_332_157_507_538_795_471_191_41))
-                .mul_add(s.0, F32x::splat(-0.166_666_537_523_269_653_320_312))
+                .mla(s.0, F32x::splat(0.008_332_157_507_538_795_471_191_41))
+                .mla(s.0, F32x::splat(-0.166_666_537_523_269_653_320_312))
                 * (s.0 * t.0);
 
             let x = t.add_checked(u);
@@ -368,10 +368,10 @@ macro_rules! impl_math_f32_u10 {
             rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F32x::splat(-2.718_118_423_672_422_068_193_55_e-7)
-                .mul_add(s.0, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
-                .mul_add(s.0, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
-                .mul_add(s.0, F32x::splat(0.041_666_664_183_139_801_025_390_6))
-                .mul_add(s.0, F32x::splat(-0.5));
+                .mla(s.0, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
+                .mla(s.0, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
+                .mla(s.0, F32x::splat(0.041_666_664_183_139_801_025_390_6))
+                .mla(s.0, F32x::splat(-0.5));
 
             let x = ONE.add_checked(s.0.mul_as_doubled(u));
             let ry = F32x::from(x);
@@ -425,7 +425,7 @@ macro_rules! impl_math_f32_u10 {
             let mut s = if d.abs().simd_lt(TRIGRANGEMAX2_F).all() {
                 let u = (d * FRAC_2_PI).round();
                 q = u.roundi();
-                let v = u.mul_add(-PI_A2_F * HALF, d);
+                let v = u.mla(-PI_A2_F * HALF, d);
                 v.add_as_doubled(u * (-PI_B2_F) * HALF)
                     .add_checked(u * (-PI_C2_F) * HALF)
             } else {
@@ -450,10 +450,10 @@ macro_rules! impl_math_f32_u10 {
             s = s.normalize();
 
             let u = F32x::splat(0.004_466_364_625_841_379_165_649_41)
-                .mul_add(s.0, F32x::splat(-8.392_018_207_814_544_439_315_8_e-5))
-                .mul_add(s.0, F32x::splat(0.010_963_924_229_145_050_048_828_1))
-                .mul_add(s.0, F32x::splat(0.021_236_030_384_898_185_729_980_5))
-                .mul_add(s.0, F32x::splat(0.054_068_714_380_264_282_226_562_5));
+                .mla(s.0, F32x::splat(-8.392_018_207_814_544_439_315_8_e-5))
+                .mla(s.0, F32x::splat(0.010_963_924_229_145_050_048_828_1))
+                .mla(s.0, F32x::splat(0.021_236_030_384_898_185_729_980_5))
+                .mla(s.0, F32x::splat(0.054_068_714_380_264_282_226_562_5));
 
             let mut x =
                 F32x::splat(0.133_325_666_189_193_725_585_938).add_checked_as_doubled(u * s.0);
@@ -477,7 +477,7 @@ macro_rules! impl_math_f32_u10 {
         pub fn tanf_deterministic(d: F32x) -> F32x {
             let u = (d * FRAC_2_PI).round();
             let mut q = u.roundi();
-            let v = u.mul_add(-PI_A2_F * HALF, d);
+            let v = u.mla(-PI_A2_F * HALF, d);
             let mut s = v.add_as_doubled(u * (-PI_B2_F * HALF));
             s = s.add_checked(u * (-PI_C2_F * HALF));
             let g = d.abs().simd_lt(TRIGRANGEMAX2_F);
@@ -506,10 +506,10 @@ macro_rules! impl_math_f32_u10 {
             s = s.normalize();
 
             let u = F32x::splat(0.004_466_364_625_841_379_165_649_41)
-                .mul_add(s.0, F32x::splat(-8.392_018_207_814_544_439_315_8_e-5))
-                .mul_add(s.0, F32x::splat(0.010_963_924_229_145_050_048_828_1))
-                .mul_add(s.0, F32x::splat(0.021_236_030_384_898_185_729_980_5))
-                .mul_add(s.0, F32x::splat(0.054_068_714_380_264_282_226_562_5));
+                .mla(s.0, F32x::splat(-8.392_018_207_814_544_439_315_8_e-5))
+                .mla(s.0, F32x::splat(0.010_963_924_229_145_050_048_828_1))
+                .mla(s.0, F32x::splat(0.021_236_030_384_898_185_729_980_5))
+                .mla(s.0, F32x::splat(0.054_068_714_380_264_282_226_562_5));
 
             let mut x =
                 F32x::splat(0.133_325_666_189_193_725_585_938).add_checked_as_doubled(u * s.0);
@@ -560,13 +560,13 @@ macro_rules! impl_math_f32_u10 {
             t = t.normalize();
 
             let u = F32x::splat(-0.001_763_979_089_446_365_833_282_47)
-                .mul_add(t.0, F32x::splat(0.010_790_090_076_625_347_137_451_2))
-                .mul_add(t.0, F32x::splat(-0.030_956_460_162_997_245_788_574_2))
-                .mul_add(t.0, F32x::splat(0.057_736_508_548_259_735_107_421_9))
-                .mul_add(t.0, F32x::splat(-0.083_895_072_340_965_270_996_093_8))
-                .mul_add(t.0, F32x::splat(0.109_463_557_600_975_036_621_094))
-                .mul_add(t.0, F32x::splat(-0.142_626_821_994_781_494_140_625))
-                .mul_add(t.0, F32x::splat(0.199_983_194_470_405_578_613_281));
+                .mla(t.0, F32x::splat(0.010_790_090_076_625_347_137_451_2))
+                .mla(t.0, F32x::splat(-0.030_956_460_162_997_245_788_574_2))
+                .mla(t.0, F32x::splat(0.057_736_508_548_259_735_107_421_9))
+                .mla(t.0, F32x::splat(-0.083_895_072_340_965_270_996_093_8))
+                .mla(t.0, F32x::splat(0.109_463_557_600_975_036_621_094))
+                .mla(t.0, F32x::splat(-0.142_626_821_994_781_494_140_625))
+                .mla(t.0, F32x::splat(0.199_983_194_470_405_578_613_281));
 
             t *= F32x::splat(-0.333_332_866_430_282_592_773_438).add_checked_as_doubled(u * t.0);
             t = s * ONE.add_checked(t);
@@ -632,10 +632,10 @@ macro_rules! impl_math_f32_u10 {
             x = d.abs().simd_eq(ONE).select_doubled(Doubled::from(ZERO), x);
 
             let u = F32x::splat(0.419_745_482_5_e-1)
-                .mul_add(x2, F32x::splat(0.242_404_602_5_e-1))
-                .mul_add(x2, F32x::splat(0.454_742_386_9_e-1))
-                .mul_add(x2, F32x::splat(0.749_502_927_1_e-1))
-                .mul_add(x2, F32x::splat(0.166_667_729_6))
+                .mla(x2, F32x::splat(0.242_404_602_5_e-1))
+                .mla(x2, F32x::splat(0.454_742_386_9_e-1))
+                .mla(x2, F32x::splat(0.749_502_927_1_e-1))
+                .mla(x2, F32x::splat(0.166_667_729_6))
                 * (x2 * x.0);
 
             let y = Doubled::new(
@@ -671,10 +671,10 @@ macro_rules! impl_math_f32_u10 {
             x = d.abs().simd_eq(ONE).select_doubled(Doubled::from(ZERO), x);
 
             let u = F32x::splat(0.419_745_482_5_e-1)
-                .mul_add(x2, F32x::splat(0.242_404_602_5_e-1))
-                .mul_add(x2, F32x::splat(0.454_742_386_9_e-1))
-                .mul_add(x2, F32x::splat(0.749_502_927_1_e-1))
-                .mul_add(x2, F32x::splat(0.166_667_729_6))
+                .mla(x2, F32x::splat(0.242_404_602_5_e-1))
+                .mla(x2, F32x::splat(0.454_742_386_9_e-1))
+                .mla(x2, F32x::splat(0.749_502_927_1_e-1))
+                .mla(x2, F32x::splat(0.166_667_729_6))
                 * (x2 * x.0);
 
             let mut y = Doubled::new(
@@ -820,9 +820,9 @@ macro_rules! impl_math_f32_u10 {
             let x2 = x.square();
 
             let t = F32x::splat(0.239_282_846_450_805_664_062_5)
-                .mul_add(x2.0, F32x::splat(0.285_182_118_415_832_519_531_25))
-                .mul_add(x2.0, F32x::splat(0.400_005_877_017_974_853_515_625))
-                .mul_add(x2.0, F32x::splat(0.666_666_686_534_881_591_796_875));
+                .mla(x2.0, F32x::splat(0.285_182_118_415_832_519_531_25))
+                .mla(x2.0, F32x::splat(0.400_005_877_017_974_853_515_625))
+                .mla(x2.0, F32x::splat(0.666_666_686_534_881_591_796_875));
 
             let mut s = Doubled::<F32x>::splat(crate::f32::D_LN2) * e.cast();
             s = s.add_checked(x.scale(F32x::splat(2.)));
@@ -946,8 +946,8 @@ macro_rules! impl_math_f32_u10 {
             let x2 = x.0 * x.0;
 
             let t = F32x::splat(0.302_729_487_4)
-                .mul_add(x2, F32x::splat(0.399_610_817_4))
-                .mul_add(x2, F32x::splat(0.666_669_488));
+                .mla(x2, F32x::splat(0.399_610_817_4))
+                .mla(x2, F32x::splat(0.666_669_488));
 
             s = s.add_checked(x.scale(F32x::splat(2.)));
             s = s.add_checked(x2 * x.0 * t);
@@ -1010,8 +1010,8 @@ macro_rules! impl_math_f32_u10 {
             let x2 = x.0 * x.0;
 
             let t = F32x::splat(0.131_428_986_8)
-                .mul_add(x2, F32x::splat(0.173_549_354_1))
-                .mul_add(x2, F32x::splat(0.289_530_962_7));
+                .mla(x2, F32x::splat(0.173_549_354_1))
+                .mla(x2, F32x::splat(0.289_530_962_7));
 
             s = s.add_checked(x * Doubled::new(
                 F32x::splat(0.868_588_984),
@@ -1071,8 +1071,8 @@ macro_rules! impl_math_f32_u10 {
             let x2 = x.0 * x.0;
 
             let t = F32x::splat(0.437_455_028_3)
-                .mul_add(x2, F32x::splat(0.576_479_017_7))
-                .mul_add(x2, F32x::splat(0.961_801_290_512));
+                .mla(x2, F32x::splat(0.576_479_017_7))
+                .mla(x2, F32x::splat(0.961_801_290_512));
             let mut s = ef
                 + x * Doubled::new(
                     F32x::splat(2.885_390_043_258_666_992_2),
@@ -1121,14 +1121,14 @@ macro_rules! impl_math_f32_u10 {
                 let dp1 = o.select(dp1 * (F1_32X * F1_32X), dp1);
                 let e = ilogb2kf(dp1 * F32x::splat(1. / 0.75));
                 let t = ldexp3kf(ONE, -e);
-                m = d.mul_add(t, t - ONE);
+                m = d.mla(t, t - ONE);
                 let e = o.select(e - I32x::splat(64), e);
                 Doubled::<F32x>::splat(crate::f32::D_LN2) * e.cast()
             }/* else {
                 let e = vgetexp_vf_vf(dp1, F32x::splat(1. / 0.75));
                 let e = e.simd_eq(INFINITY).select(F32x::splat(128.), e);
                 let t = ldexp3kf(ONE, -e.roundi());
-                m = d.mul_add(t, t - ONE);
+                m = d.mla(t, t - ONE);
                 Doubled::<F32x>::splat(crate::f32::D_LN2) * e
             }*/;
 
@@ -1136,8 +1136,8 @@ macro_rules! impl_math_f32_u10 {
             let x2 = x.0 * x.0;
 
             let t = F32x::splat(0.302_729_487_4)
-                .mul_add(x2, F32x::splat(0.399_610_817_4))
-                .mul_add(x2, F32x::splat(0.666_669_488));
+                .mla(x2, F32x::splat(0.399_610_817_4))
+                .mla(x2, F32x::splat(0.666_669_488));
 
             s = s.add_checked(x.scale(F32x::splat(2.)));
             s = s.add_checked(x2 * x.0 * t);
@@ -1167,17 +1167,17 @@ macro_rules! impl_math_f32_u10 {
         pub fn expf(d: F32x) -> F32x {
             let q = (d * R_LN2_F).roundi();
 
-            let s = q.cast::<f32>().mul_add(-L2U_F, d);
-            let s = q.cast::<f32>().mul_add(-L2L_F, s);
+            let s = q.cast::<f32>().mla(-L2U_F, d);
+            let s = q.cast::<f32>().mla(-L2L_F, s);
 
             let mut u = F32x::splat(0.000_198_527_617_612_853_646_278_381)
-                .mul_add(s, F32x::splat(0.001_393_043_552_525_341_510_772_71))
-                .mul_add(s, F32x::splat(0.008_333_360_776_305_198_669_433_59))
-                .mul_add(s, F32x::splat(0.041_666_485_369_205_474_853_515_6))
-                .mul_add(s, F32x::splat(0.166_666_671_633_720_397_949_219))
-                .mul_add(s, HALF);
+                .mla(s, F32x::splat(0.001_393_043_552_525_341_510_772_71))
+                .mla(s, F32x::splat(0.008_333_360_776_305_198_669_433_59))
+                .mla(s, F32x::splat(0.041_666_485_369_205_474_853_515_6))
+                .mla(s, F32x::splat(0.166_666_671_633_720_397_949_219))
+                .mla(s, HALF);
 
-            u = ONE + (s * s).mul_add(u, s);
+            u = ONE + (s * s).mla(u, s);
 
             u = ldexp2kf(u, q);
 
@@ -1203,15 +1203,15 @@ macro_rules! impl_math_f32_u10 {
             let mut u = (d * LOG10_2_F).round();
             let q = u.roundi();
 
-            let s = u.mul_add(-L10U_F, d);
-            let s = u.mul_add(-L10L_F, s);
+            let s = u.mla(-L10U_F, d);
+            let s = u.mla(-L10L_F, s);
 
             u = F32x::splat(0.680_255_591_9_e-1)
-                .mul_add(s, F32x::splat(0.207_808_032_6))
-                .mul_add(s, F32x::splat(0.539_390_385_2))
-                .mul_add(s, F32x::splat(0.117_124_533_7_e+1))
-                .mul_add(s, F32x::splat(0.203_467_869_8_e+1))
-                .mul_add(s, F32x::splat(0.265_094_900_1_e+1));
+                .mla(s, F32x::splat(0.207_808_032_6))
+                .mla(s, F32x::splat(0.539_390_385_2))
+                .mla(s, F32x::splat(0.117_124_533_7_e+1))
+                .mla(s, F32x::splat(0.203_467_869_8_e+1))
+                .mla(s, F32x::splat(0.265_094_900_1_e+1));
             let x = Doubled::new(
                 F32x::splat(2.302_585_124_969_482_421_9),
                 F32x::splat(-3.170_517_251_649_359_315_7_e-08)
@@ -1273,14 +1273,14 @@ macro_rules! impl_math_f32_u10 {
             let s = d - u;
 
             u = F32x::splat(0.153_592_089_2_e-3)
-                .mul_add(s, F32x::splat(0.133_926_270_1_e-2))
-                .mul_add(s, F32x::splat(0.961_838_476_4_e-2))
-                .mul_add(s, F32x::splat(0.555_034_726_9_e-1))
-                .mul_add(s, F32x::splat(0.240_226_447_6))
-                .mul_add(s, F32x::splat(0.693_147_182_5));
+                .mla(s, F32x::splat(0.133_926_270_1_e-2))
+                .mla(s, F32x::splat(0.961_838_476_4_e-2))
+                .mla(s, F32x::splat(0.555_034_726_9_e-1))
+                .mla(s, F32x::splat(0.240_226_447_6))
+                .mla(s, F32x::splat(0.693_147_182_5));
 
             if cfg!(target_feature = "fma") {
-                u = u.mul_add(s, ONE);
+                u = u.mla(s, ONE);
             } else {
                 u = ONE.add_checked(u.mul_as_doubled(s)).normalize().0;
             }
@@ -1324,8 +1324,8 @@ macro_rules! impl_math_f32_u10 {
             let x2 = x.square();
 
             let t = F32x::splat(0.240_320_354_700_088_500_976_562)
-                .mul_add(x2.0, F32x::splat(0.285_112_679_004_669_189_453_125))
-                .mul_add(x2.0, F32x::splat(0.400_007_992_982_864_379_882_812));
+                .mla(x2.0, F32x::splat(0.285_112_679_004_669_189_453_125))
+                .mla(x2.0, F32x::splat(0.400_007_992_982_864_379_882_812));
             let c = Doubled::new(
                 F32x::splat(0.666_666_626_930_236_816_406_25),
                 F32x::splat(3.691_838_612_596_143_320_843_11_e-9),
@@ -1348,10 +1348,10 @@ macro_rules! impl_math_f32_u10 {
             s = s.normalize();
 
             let mut u = F32x::splat(0.001_363_246_468_827_128_410_339_36)
-                .mul_add(s.0, F32x::splat(0.008_365_969_173_610_210_418_701_17))
-                .mul_add(s.0, F32x::splat(0.041_671_082_377_433_776_855_468_8))
-                .mul_add(s.0, F32x::splat(0.166_665_524_244_308_471_679_688))
-                .mul_add(s.0, F32x::splat(0.499_999_850_988_388_061_523_438));
+                .mla(s.0, F32x::splat(0.008_365_969_173_610_210_418_701_17))
+                .mla(s.0, F32x::splat(0.041_671_082_377_433_776_855_468_8))
+                .mla(s.0, F32x::splat(0.166_665_524_244_308_471_679_688))
+                .mla(s.0, F32x::splat(0.499_999_850_988_388_061_523_438));
 
             let mut t = s.add_checked(s.square() * u);
 
@@ -1464,11 +1464,11 @@ macro_rules! impl_math_f32_u10 {
             d = d.abs();
 
             let mut x = F32x::splat(-0.601_564_466_953_277_587_890_625)
-                .mul_add(d, F32x::splat(2.820_889_234_542_846_679_687_5))
-                .mul_add(d, F32x::splat(-5.532_182_216_644_287_109_375))
-                .mul_add(d, F32x::splat(5.898_262_500_762_939_453_125))
-                .mul_add(d, F32x::splat(-3.809_541_702_270_507_812_5))
-                .mul_add(d, F32x::splat(2.224_125_623_703_002_929_687_5));
+                .mla(d, F32x::splat(2.820_889_234_542_846_679_687_5))
+                .mla(d, F32x::splat(-5.532_182_216_644_287_109_375))
+                .mla(d, F32x::splat(5.898_262_500_762_939_453_125))
+                .mla(d, F32x::splat(-3.809_541_702_270_507_812_5))
+                .mla(d, F32x::splat(2.224_125_623_703_002_929_687_5));
 
             let mut y = x * x;
             y = y * y;
@@ -1543,7 +1543,7 @@ macro_rules! impl_math_f32_u10 {
                 0.943_515_777_6,
                 0.110_248_955_e-3,
             )
-            .mul_add(
+            .mla(
                 t,
                 F32x::select3(
                     o2,
@@ -1553,7 +1553,7 @@ macro_rules! impl_math_f32_u10 {
                     0.816_001_993_4_e-4,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F32x::select3(
                     o2,
@@ -1563,7 +1563,7 @@ macro_rules! impl_math_f32_u10 {
                     0.152_846_885_6_e-3,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F32x::select3(
                     o2,
@@ -1573,7 +1573,7 @@ macro_rules! impl_math_f32_u10 {
                     -0.235_506_871_8_e-3,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F32x::select3(
                     o2,
@@ -1583,7 +1583,7 @@ macro_rules! impl_math_f32_u10 {
                     0.496_224_209_2_e-3,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F32x::select3(
                     o2,
@@ -1593,7 +1593,7 @@ macro_rules! impl_math_f32_u10 {
                     -0.119_348_801_7_e-2,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F32x::select3(
                     o2,
@@ -1603,7 +1603,7 @@ macro_rules! impl_math_f32_u10 {
                     0.289_159_943_3_e-2,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F32x::select3(
                     o2,
@@ -1613,7 +1613,7 @@ macro_rules! impl_math_f32_u10 {
                     -0.738_545_181_2_e-2,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F32x::select3(
                     o2,
diff --git a/src/f32x/u15_impl.rs b/src/f32x/u15_impl.rs
index bbb0051..8f1a22e 100644
--- a/src/f32x/u15_impl.rs
+++ b/src/f32x/u15_impl.rs
@@ -25,7 +25,7 @@ macro_rules! impl_math_f32_u15 {
                 -0.386_950_403_5,
                 0.111_534_416_7_e+1,
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F32x::select4(
                     o0,
@@ -37,7 +37,7 @@ macro_rules! impl_math_f32_u15 {
                     -0.945_490_419_9,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F32x::select4(
                     o0,
@@ -49,7 +49,7 @@ macro_rules! impl_math_f32_u15 {
                     -0.366_725_951_4,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F32x::select4(
                     o0,
@@ -61,7 +61,7 @@ macro_rules! impl_math_f32_u15 {
                     0.715_566_337_1,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F32x::select4(
                     o0,
diff --git a/src/f32x/u35_impl.rs b/src/f32x/u35_impl.rs
index 816fd6a..677c83c 100644
--- a/src/f32x/u35_impl.rs
+++ b/src/f32x/u35_impl.rs
@@ -14,16 +14,16 @@ macro_rules! impl_math_f32_u35 {
             if d.abs().simd_lt(TRIGRANGEMAX2_F).all() {
                 q = (d * FRAC_1_PI).roundi();
                 u = q.cast();
-                d = u.mul_add(-PI_A2_F, d);
-                d = u.mul_add(-PI_B2_F, d);
-                d = u.mul_add(-PI_C2_F, d);
+                d = u.mla(-PI_A2_F, d);
+                d = u.mla(-PI_B2_F, d);
+                d = u.mla(-PI_C2_F, d);
             } else if d.abs().simd_lt(TRIGRANGEMAX_F).all() {
                 q = (d * FRAC_1_PI).roundi();
                 u = q.cast();
-                d = u.mul_add(-PI_A_F, d);
-                d = u.mul_add(-PI_B_F, d);
-                d = u.mul_add(-PI_C_F, d);
-                d = u.mul_add(-PI_D_F, d);
+                d = u.mla(-PI_A_F, d);
+                d = u.mla(-PI_B_F, d);
+                d = u.mla(-PI_C_F, d);
+                d = u.mla(-PI_D_F, d);
             } else {
                 let (mut dfidf, dfii) = rempif(d);
                 q = dfii & I32x::splat(3);
@@ -49,9 +49,9 @@ macro_rules! impl_math_f32_u35 {
             );
 
             let mut u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
-                .mul_add(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
-                .mul_add(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
-                .mul_add(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
+                .mla(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
+                .mla(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
+                .mla(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
 
             u = s * (u * d) + d;
 
@@ -69,17 +69,17 @@ macro_rules! impl_math_f32_u35 {
 
             let mut q = (d * FRAC_1_PI).roundi();
             let u: F32x = q.cast();
-            d = u.mul_add(-PI_A2_F, d);
-            d = u.mul_add(-PI_B2_F, d);
-            d = u.mul_add(-PI_C2_F, d);
+            d = u.mla(-PI_A2_F, d);
+            d = u.mla(-PI_B2_F, d);
+            d = u.mla(-PI_C2_F, d);
             let g = r.abs().simd_lt(TRIGRANGEMAX2_F);
 
             if !g.all() {
                 let s: F32x = q.cast();
-                let mut u = s.mul_add(-PI_A_F, r);
-                u = s.mul_add(-PI_B_F, u);
-                u = s.mul_add(-PI_C_F, u);
-                u = s.mul_add(-PI_D_F, u);
+                let mut u = s.mla(-PI_A_F, r);
+                u = s.mla(-PI_B_F, u);
+                u = s.mla(-PI_C_F, u);
+                u = s.mla(-PI_D_F, u);
 
                 d = g.select(d, u);
                 let g = r.abs().simd_lt(TRIGRANGEMAX_F);
@@ -113,9 +113,9 @@ macro_rules! impl_math_f32_u35 {
             );
 
             let mut u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
-                .mul_add(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
-                .mul_add(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
-                .mul_add(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
+                .mla(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
+                .mla(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
+                .mla(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
 
             u = s * (u * d) + d;
 
@@ -151,18 +151,18 @@ macro_rules! impl_math_f32_u35 {
                 q = q + q + I32x::splat(1);
 
                 let u: F32x = q.cast();
-                d = u.mul_add(-PI_A2_F * HALF, d);
-                d = u.mul_add(-PI_B2_F * HALF, d);
-                d = u.mul_add(-PI_C2_F * HALF, d);
+                d = u.mla(-PI_A2_F * HALF, d);
+                d = u.mla(-PI_B2_F * HALF, d);
+                d = u.mla(-PI_C2_F * HALF, d);
             } else if d.abs().simd_lt(TRIGRANGEMAX_F).all() {
                 q = (d * FRAC_1_PI - HALF).roundi();
                 q = q + q + I32x::splat(1);
 
                 let u: F32x = q.cast();
-                d = u.mul_add(-PI_A_F * HALF, d);
-                d = u.mul_add(-PI_B_F * HALF, d);
-                d = u.mul_add(-PI_C_F * HALF, d);
-                d = u.mul_add(-PI_D_F * HALF, d);
+                d = u.mla(-PI_A_F * HALF, d);
+                d = u.mla(-PI_B_F * HALF, d);
+                d = u.mla(-PI_C_F * HALF, d);
+                d = u.mla(-PI_D_F * HALF, d);
             } else {
                 let (mut dfidf, dfii) = rempif(d);
                 q = dfii & I32x::splat(3);
@@ -189,9 +189,9 @@ macro_rules! impl_math_f32_u35 {
             );
 
             let u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
-                .mul_add(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
-                .mul_add(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
-                .mul_add(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
+                .mla(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
+                .mla(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
+                .mla(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
 
             s * (u * d) + d
         }
@@ -208,17 +208,17 @@ macro_rules! impl_math_f32_u35 {
             let mut q = (d * FRAC_1_PI - HALF).roundi();
             q = q + q + I32x::splat(1);
             let u: F32x = q.cast();
-            d = u.mul_add(-PI_A2_F * HALF, d);
-            d = u.mul_add(-PI_B2_F * HALF, d);
-            d = u.mul_add(-PI_C2_F * HALF, d);
+            d = u.mla(-PI_A2_F * HALF, d);
+            d = u.mla(-PI_B2_F * HALF, d);
+            d = u.mla(-PI_C2_F * HALF, d);
             let g = r.abs().simd_lt(TRIGRANGEMAX2_F);
 
             if !g.all() {
                 let s: F32x = q.cast();
-                let mut u = s.mul_add(-PI_A_F * HALF, r);
-                u = s.mul_add(-PI_B_F * HALF, u);
-                u = s.mul_add(-PI_C_F * HALF, u);
-                u = s.mul_add(-PI_D_F * HALF, u);
+                let mut u = s.mla(-PI_A_F * HALF, r);
+                u = s.mla(-PI_B_F * HALF, u);
+                u = s.mla(-PI_C_F * HALF, u);
+                u = s.mla(-PI_D_F * HALF, u);
 
                 d = g.select(d, u);
                 let g = r.abs().simd_lt(TRIGRANGEMAX_F);
@@ -253,9 +253,9 @@ macro_rules! impl_math_f32_u35 {
             );
 
             let u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
-                .mul_add(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
-                .mul_add(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
-                .mul_add(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
+                .mla(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
+                .mla(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
+                .mla(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
 
             s * (u * d) + d
         }
@@ -289,16 +289,16 @@ macro_rules! impl_math_f32_u35 {
             if d.abs().simd_lt(TRIGRANGEMAX2_F).all() {
                 q = (d * FRAC_2_PI).roundi();
                 let u: F32x = q.cast();
-                s = u.mul_add(-PI_A2_F * HALF, s);
-                s = u.mul_add(-PI_B2_F * HALF, s);
-                s = u.mul_add(-PI_C2_F * HALF, s);
+                s = u.mla(-PI_A2_F * HALF, s);
+                s = u.mla(-PI_B2_F * HALF, s);
+                s = u.mla(-PI_C2_F * HALF, s);
             } else if d.abs().simd_lt(TRIGRANGEMAX_F).all() {
                 q = (d * FRAC_2_PI).roundi();
                 let u: F32x = q.cast();
-                s = u.mul_add(-PI_A_F * HALF, s);
-                s = u.mul_add(-PI_B_F * HALF, s);
-                s = u.mul_add(-PI_C_F * HALF, s);
-                s = u.mul_add(-PI_D_F * HALF, s);
+                s = u.mla(-PI_A_F * HALF, s);
+                s = u.mla(-PI_B_F * HALF, s);
+                s = u.mla(-PI_C_F * HALF, s);
+                s = u.mla(-PI_D_F * HALF, s);
             } else {
                 let (dfidf, dfii) = rempif(d);
                 q = dfii;
@@ -311,19 +311,19 @@ macro_rules! impl_math_f32_u35 {
             s = s * s;
 
             let u = F32x::splat(-0.000_195_169_282_960_705_459_117_889)
-                .mul_add(s, F32x::splat(0.008_332_157_507_538_795_471_191_41))
-                .mul_add(s, F32x::splat(-0.166_666_537_523_269_653_320_312));
+                .mla(s, F32x::splat(0.008_332_157_507_538_795_471_191_41))
+                .mla(s, F32x::splat(-0.166_666_537_523_269_653_320_312));
 
-            let rx = (u * s).mul_add(t, t);
+            let rx = (u * s).mla(t, t);
             let rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F32x::splat(-2.718_118_423_672_422_068_193_55_e-7)
-                .mul_add(s, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
-                .mul_add(s, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
-                .mul_add(s, F32x::splat(0.041_666_664_183_139_801_025_390_6))
-                .mul_add(s, F32x::splat(-0.5));
+                .mla(s, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
+                .mla(s, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
+                .mla(s, F32x::splat(0.041_666_664_183_139_801_025_390_6))
+                .mla(s, F32x::splat(-0.5));
 
-            let ry = s.mul_add(u, ONE);
+            let ry = s.mla(u, ONE);
 
             let o = (q & I32x::splat(1)).simd_eq(I32x::splat(0));
             let mut rsin = o.select(rx, ry);
@@ -351,18 +351,18 @@ macro_rules! impl_math_f32_u35 {
         pub fn sincosf_deterministic(d: F32x) -> (F32x, F32x) {
             let mut q = (d * FRAC_2_PI).roundi();
             let u: F32x = q.cast();
-            let mut s = u.mul_add(-PI_A2_F * HALF, d);
-            s = u.mul_add(-PI_B2_F * HALF, s);
-            s = u.mul_add(-PI_C2_F * HALF, s);
+            let mut s = u.mla(-PI_A2_F * HALF, d);
+            s = u.mla(-PI_B2_F * HALF, s);
+            s = u.mla(-PI_C2_F * HALF, s);
             let g = d.abs().simd_lt(TRIGRANGEMAX2_F);
 
             if !g.all() {
                 let q2 = (d * FRAC_2_PI).roundi();
                 let u: F32x = q2.cast();
-                let mut t = u.mul_add(-PI_A_F * HALF, d);
-                t = u.mul_add(-PI_B_F * HALF, t);
-                t = u.mul_add(-PI_C_F * HALF, t);
-                t = u.mul_add(-PI_D_F * HALF, t);
+                let mut t = u.mla(-PI_A_F * HALF, d);
+                t = u.mla(-PI_B_F * HALF, t);
+                t = u.mla(-PI_C_F * HALF, t);
+                t = u.mla(-PI_D_F * HALF, t);
 
                 q = g.select(q, q2);
                 s = g.select(s, t);
@@ -383,19 +383,19 @@ macro_rules! impl_math_f32_u35 {
             s = s * s;
 
             let u = F32x::splat(-0.000_195_169_282_960_705_459_117_889)
-                .mul_add(s, F32x::splat(0.008_332_157_507_538_795_471_191_41))
-                .mul_add(s, F32x::splat(-0.166_666_537_523_269_653_320_312));
+                .mla(s, F32x::splat(0.008_332_157_507_538_795_471_191_41))
+                .mla(s, F32x::splat(-0.166_666_537_523_269_653_320_312));
 
-            let mut rx = (u * s).mul_add(t, t);
+            let mut rx = (u * s).mla(t, t);
             rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F32x::splat(-2.718_118_423_672_422_068_193_55_e-7)
-                .mul_add(s, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
-                .mul_add(s, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
-                .mul_add(s, F32x::splat(0.041_666_664_183_139_801_025_390_6))
-                .mul_add(s, F32x::splat(-0.5));
+                .mla(s, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
+                .mla(s, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
+                .mla(s, F32x::splat(0.041_666_664_183_139_801_025_390_6))
+                .mla(s, F32x::splat(-0.5));
 
-            let ry = s.mul_add(u, ONE);
+            let ry = s.mla(u, ONE);
 
             let o = (q & I32x::splat(1)).simd_eq(I32x::splat(0));
             let mut rsin = o.select(rx, ry);
@@ -446,16 +446,16 @@ macro_rules! impl_math_f32_u35 {
             if d.abs().simd_lt(TRIGRANGEMAX2_F * HALF).all() {
                 q = (d * FRAC_2_PI).roundi();
                 let u: F32x = q.cast();
-                x = u.mul_add(-PI_A2_F * HALF, x);
-                x = u.mul_add(-PI_B2_F * HALF, x);
-                x = u.mul_add(-PI_C2_F * HALF, x);
+                x = u.mla(-PI_A2_F * HALF, x);
+                x = u.mla(-PI_B2_F * HALF, x);
+                x = u.mla(-PI_C2_F * HALF, x);
             } else if d.abs().simd_lt(TRIGRANGEMAX_F).all() {
                 q = (d * (F32x::splat(2.) * FRAC_1_PI)).roundi();
                 let u: F32x = q.cast();
-                x = u.mul_add(-PI_A_F * HALF, x);
-                x = u.mul_add(-PI_B_F * HALF, x);
-                x = u.mul_add(-PI_C_F * HALF, x);
-                x = u.mul_add(-PI_D_F * HALF, x);
+                x = u.mla(-PI_A_F * HALF, x);
+                x = u.mla(-PI_B_F * HALF, x);
+                x = u.mla(-PI_C_F * HALF, x);
+                x = u.mla(-PI_D_F * HALF, x);
             } else {
                 let (dfidf, dfii) = rempif(d);
                 q = dfii;
@@ -471,11 +471,11 @@ macro_rules! impl_math_f32_u35 {
 
             let mut u = if cfg!(feature = "enable_neon32") {
                 F32x::splat(0.009_272_458_031_773_567_199_707_03)
-                    .mul_add(s, F32x::splat(0.003_319_849_958_643_317_222_595_21))
-                    .mul_add(s, F32x::splat(0.024_299_807_846_546_173_095_703_1))
-                    .mul_add(s, F32x::splat(0.053_449_530_154_466_629_028_320_3))
-                    .mul_add(s, F32x::splat(0.133_383_005_857_467_651_367_188))
-                    .mul_add(s, F32x::splat(0.333_331_853_151_321_411_132_812))
+                    .mla(s, F32x::splat(0.003_319_849_958_643_317_222_595_21))
+                    .mla(s, F32x::splat(0.024_299_807_846_546_173_095_703_1))
+                    .mla(s, F32x::splat(0.053_449_530_154_466_629_028_320_3))
+                    .mla(s, F32x::splat(0.133_383_005_857_467_651_367_188))
+                    .mla(s, F32x::splat(0.333_331_853_151_321_411_132_812))
             } else {
                 let s2 = s * s;
                 let s4 = s2 * s2;
@@ -489,7 +489,7 @@ macro_rules! impl_math_f32_u35 {
                     0.333_331_853_151_321_411_132_812)
             };
 
-            u = s.mul_add(u * x, x);
+            u = s.mla(u * x, x);
 
             o.select(u.recip(), u)
         }
@@ -503,18 +503,18 @@ macro_rules! impl_math_f32_u35 {
         pub fn tanf_deterministic(d: F32x) -> F32x {
             let mut q = (d * FRAC_2_PI).roundi();
             let u: F32x = q.cast();
-            let mut x = u.mul_add(-PI_A2_F * HALF, d);
-            x = u.mul_add(-PI_B2_F * HALF, x);
-            x = u.mul_add(-PI_C2_F * HALF, x);
+            let mut x = u.mla(-PI_A2_F * HALF, d);
+            x = u.mla(-PI_B2_F * HALF, x);
+            x = u.mla(-PI_C2_F * HALF, x);
             let g = d.abs().simd_lt(TRIGRANGEMAX2_F * HALF);
 
             if !g.all() {
                 let q2 = (d * FRAC_2_PI).roundi();
                 let s: F32x = q.cast();
-                let mut u = s.mul_add(-PI_A_F * HALF, d);
-                u = s.mul_add(-PI_B_F * HALF, u);
-                u = s.mul_add(-PI_C_F * HALF, u);
-                u = s.mul_add(-PI_D_F * HALF, u);
+                let mut u = s.mla(-PI_A_F * HALF, d);
+                u = s.mla(-PI_B_F * HALF, u);
+                u = s.mla(-PI_C_F * HALF, u);
+                u = s.mla(-PI_D_F * HALF, u);
 
                 q = g.select(q, q2);
                 x = g.select(x, u);
@@ -537,11 +537,11 @@ macro_rules! impl_math_f32_u35 {
 
             let mut u = if cfg!(feature = "enable_neon32") {
                 F32x::splat(0.009_272_458_031_773_567_199_707_03)
-                    .mul_add(s, F32x::splat(0.003_319_849_958_643_317_222_595_21))
-                    .mul_add(s, F32x::splat(0.024_299_807_846_546_173_095_703_1))
-                    .mul_add(s, F32x::splat(0.053_449_530_154_466_629_028_320_3))
-                    .mul_add(s, F32x::splat(0.133_383_005_857_467_651_367_188))
-                    .mul_add(s, F32x::splat(0.333_331_853_151_321_411_132_812))
+                    .mla(s, F32x::splat(0.003_319_849_958_643_317_222_595_21))
+                    .mla(s, F32x::splat(0.024_299_807_846_546_173_095_703_1))
+                    .mla(s, F32x::splat(0.053_449_530_154_466_629_028_320_3))
+                    .mla(s, F32x::splat(0.133_383_005_857_467_651_367_188))
+                    .mla(s, F32x::splat(0.333_331_853_151_321_411_132_812))
             } else {
                 let s2 = s * s;
                 let s4 = s2 * s2;
@@ -555,7 +555,7 @@ macro_rules! impl_math_f32_u35 {
                     0.333_331_853_151_321_411_132_812)
             };
 
-            u = s.mul_add(u * x, x);
+            u = s.mla(u * x, x);
 
             o.select(u.recip(), u)
         }
@@ -595,19 +595,19 @@ macro_rules! impl_math_f32_u35 {
             //
 
             let u = F32x::splat(-0.360_092_526_5_e-4)
-                .mul_add(s, F32x::splat(0.249_008_811_1_e-2))
-                .mul_add(s, F32x::splat(-0.807_455_107_6_e-1))
-                .mul_add(s, F32x::splat(0.785_398_185_3));
+                .mla(s, F32x::splat(0.249_008_811_1_e-2))
+                .mla(s, F32x::splat(-0.807_455_107_6_e-1))
+                .mla(s, F32x::splat(0.785_398_185_3));
 
             let rx = u * t;
 
             //
 
             let u = F32x::splat(0.353_981_522_5_e-5)
-                .mul_add(s, F32x::splat(-0.325_957_400_5_e-3))
-                .mul_add(s, F32x::splat(0.158_543_158_3_e-1))
-                .mul_add(s, F32x::splat(-0.308_425_128_5))
-                .mul_add(s, ONE);
+                .mla(s, F32x::splat(-0.325_957_400_5_e-3))
+                .mla(s, F32x::splat(0.158_543_158_3_e-1))
+                .mla(s, F32x::splat(-0.308_425_128_5))
+                .mla(s, ONE);
 
             let ry = u;
 
@@ -678,8 +678,8 @@ macro_rules! impl_math_f32_u35 {
                 0.199_926_957_488_059_997_558_594,
                 -0.333_331_018_686_294_555_664_062);
 
-            let t = s.mul_add(t * u, s);
-            q.cast::<f32>().mul_add(FRAC_PI_2, t)
+            let t = s.mla(t * u, s);
+            q.cast::<f32>().mla(FRAC_PI_2, t)
         }
 
         /// Arc tangent function of two variables
@@ -729,13 +729,13 @@ macro_rules! impl_math_f32_u35 {
             let x = o.select(d.abs(), x2.sqrt());
 
             let u = F32x::splat(0.419_745_482_5_e-1)
-                .mul_add(x2, F32x::splat(0.242_404_602_5_e-1))
-                .mul_add(x2, F32x::splat(0.454_742_386_9_e-1))
-                .mul_add(x2, F32x::splat(0.749_502_927_1_e-1))
-                .mul_add(x2, F32x::splat(0.166_667_729_6))
-                .mul_add(x * x2, x);
+                .mla(x2, F32x::splat(0.242_404_602_5_e-1))
+                .mla(x2, F32x::splat(0.454_742_386_9_e-1))
+                .mla(x2, F32x::splat(0.749_502_927_1_e-1))
+                .mla(x2, F32x::splat(0.166_667_729_6))
+                .mla(x * x2, x);
 
-            let r = o.select(u, u.mul_add(F32x::splat(-2.), FRAC_PI_2));
+            let r = o.select(u, u.mla(F32x::splat(-2.), FRAC_PI_2));
             r.mul_sign(d)
         }
 
@@ -760,10 +760,10 @@ macro_rules! impl_math_f32_u35 {
             x = d.abs().simd_eq(ONE).select(ZERO, x);
 
             let u = F32x::splat(0.419_745_482_5_e-1)
-                .mul_add(x2, F32x::splat(0.242_404_602_5_e-1))
-                .mul_add(x2, F32x::splat(0.454_742_386_9_e-1))
-                .mul_add(x2, F32x::splat(0.749_502_927_1_e-1))
-                .mul_add(x2, F32x::splat(0.166_667_729_6))
+                .mla(x2, F32x::splat(0.242_404_602_5_e-1))
+                .mla(x2, F32x::splat(0.454_742_386_9_e-1))
+                .mla(x2, F32x::splat(0.749_502_927_1_e-1))
+                .mla(x2, F32x::splat(0.166_667_729_6))
                 * (x2 * x);
 
             let y = F32x::splat(core::f32::consts::FRAC_PI_2) - (x.mul_sign(d) + u.mul_sign(d));
@@ -812,7 +812,7 @@ macro_rules! impl_math_f32_u35 {
                 0.199_926_957_488_059_997_558_594,
                 -0.333_331_018_686_294_555_664_062);
 
-            t = s.mul_add(t * u, s);
+            t = s.mla(t * u, s);
 
             t = (q & I32x::splat(1))
                 .simd_eq(I32x::splat(1))
@@ -877,7 +877,7 @@ macro_rules! impl_math_f32_u35 {
         /// or a correct value with `3.5 ULP` error bound is returned.
         pub fn coshf(x: F32x) -> F32x {
             let e = u10::expf(x.abs());
-            let mut y = HALF.mul_add(e, HALF / e);
+            let mut y = HALF.mla(e, HALF / e);
 
             y = (x.abs().simd_gt(F32x::splat(88.)) | y.is_nan()).select(INFINITY, y);
             F32x::from_bits(x.is_nan().to_int().cast() | y.to_bits())
@@ -943,12 +943,12 @@ macro_rules! impl_math_f32_u35 {
             let x2 = x * x;
 
             let t = F32x::splat(0.239_282_846_450_805_664_062_5)
-                .mul_add(x2, F32x::splat(0.285_182_118_415_832_519_531_25))
-                .mul_add(x2, F32x::splat(0.400_005_877_017_974_853_515_625))
-                .mul_add(x2, F32x::splat(0.666_666_686_534_881_591_796_875))
-                .mul_add(x2, F32x::splat(2.));
+                .mla(x2, F32x::splat(0.285_182_118_415_832_519_531_25))
+                .mla(x2, F32x::splat(0.400_005_877_017_974_853_515_625))
+                .mla(x2, F32x::splat(0.666_666_686_534_881_591_796_875))
+                .mla(x2, F32x::splat(2.));
 
-            x = x.mul_add(t, F32x::splat(0.693_147_180_559_945_286_226_764) * ef);
+            x = x.mla(t, F32x::splat(0.693_147_180_559_945_286_226_764) * ef);
             /*if !cfg!(feature = "enable_avx512f") && !cfg!(feature = "enable_avx512fnofma") {*/
             x = d.simd_eq(INFINITY).select(INFINITY, x);
             x = (d.simd_lt(ZERO) | d.is_nan()).select(NAN, x);
@@ -987,18 +987,18 @@ macro_rules! impl_math_f32_u35 {
             let x2 = x * x;
 
             let t = F32x::splat(0.437_408_834_7)
-                .mul_add(x2, F32x::splat(0.576_484_382_2))
-                .mul_add(x2, F32x::splat(0.961_802_423));
+                .mla(x2, F32x::splat(0.576_484_382_2))
+                .mla(x2, F32x::splat(0.961_802_423));
 
             //if !cfg!(feature = "enable_avx512f") && !cfg!(feature = "enable_avx512fnofma")
             {
-                let mut r = (x2 * x).mul_add(t, x.mul_add(F32x::splat(0.288_539_004_3_e+1), e.cast()));
+                let mut r = (x2 * x).mla(t, x.mla(F32x::splat(0.288_539_004_3_e+1), e.cast()));
 
                 r = d.simd_eq(INFINITY).select(INFINITY, r);
                 r = (d.simd_lt(ZERO) | d.is_nan()).select(NAN, r);
                 d.simd_eq(ZERO).select(NEG_INFINITY, r)
             /*} else {
-                let r = (x2 * x).mul_add(t, x.mul_add(F32x::splat(0.288_539_004_3_e+1), e));
+                let r = (x2 * x).mla(t, x.mla(F32x::splat(0.288_539_004_3_e+1), e));
 
                 vfixup_vf_vf_vf_vi2_i(r, d, I32::splat((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0)
             */
@@ -1022,16 +1022,16 @@ macro_rules! impl_math_f32_u35 {
             let mut u = (d * LOG10_2_F).round();
             let q = u.roundi();
 
-            let mut s = u.mul_add(-L10U_F, d);
-            s = u.mul_add(-L10L_F, s);
+            let mut s = u.mla(-L10U_F, d);
+            s = u.mla(-L10L_F, s);
 
             u = F32x::splat(0.206_400_498_7)
-                .mul_add(s, F32x::splat(0.541_787_743_6))
-                .mul_add(s, F32x::splat(0.117_128_682_1_e+1))
-                .mul_add(s, F32x::splat(0.203_465_604_8_e+1))
-                .mul_add(s, F32x::splat(0.265_094_876_3_e+1))
-                .mul_add(s, F32x::splat(0.230_258_512_5_e+1))
-                .mul_add(s, F32x::splat(0.1_e+1));
+                .mla(s, F32x::splat(0.541_787_743_6))
+                .mla(s, F32x::splat(0.117_128_682_1_e+1))
+                .mla(s, F32x::splat(0.203_465_604_8_e+1))
+                .mla(s, F32x::splat(0.265_094_876_3_e+1))
+                .mla(s, F32x::splat(0.230_258_512_5_e+1))
+                .mla(s, F32x::splat(0.1_e+1));
 
             u = ldexp2kf(u, q);
 
@@ -1061,12 +1061,12 @@ macro_rules! impl_math_f32_u35 {
             let s = d - u;
 
             u = F32x::splat(0.153_592_089_2_e-3)
-                .mul_add(s, F32x::splat(0.133_926_270_1_e-2))
-                .mul_add(s, F32x::splat(0.961_838_476_4_e-2))
-                .mul_add(s, F32x::splat(0.555_034_726_9_e-1))
-                .mul_add(s, F32x::splat(0.240_226_447_6))
-                .mul_add(s, F32x::splat(0.693_147_182_5))
-                .mul_add(s, F32x::splat(0.1_e+1));
+                .mla(s, F32x::splat(0.133_926_270_1_e-2))
+                .mla(s, F32x::splat(0.961_838_476_4_e-2))
+                .mla(s, F32x::splat(0.555_034_726_9_e-1))
+                .mla(s, F32x::splat(0.240_226_447_6))
+                .mla(s, F32x::splat(0.693_147_182_5))
+                .mla(s, F32x::splat(0.1_e+1));
 
             u = ldexp2kf(u, q);
 
@@ -1167,14 +1167,14 @@ macro_rules! impl_math_f32_u35 {
             d = d.abs();
 
             let x = F32x::splat(-0.601_564_466_953_277_587_890_625)
-                .mul_add(d, F32x::splat(2.820_889_234_542_846_679_687_5))
-                .mul_add(d, F32x::splat(-5.532_182_216_644_287_109_375))
-                .mul_add(d, F32x::splat(5.898_262_500_762_939_453_125))
-                .mul_add(d, F32x::splat(-3.809_541_702_270_507_812_5))
-                .mul_add(d, F32x::splat(2.224_125_623_703_002_929_687_5));
+                .mla(d, F32x::splat(2.820_889_234_542_846_679_687_5))
+                .mla(d, F32x::splat(-5.532_182_216_644_287_109_375))
+                .mla(d, F32x::splat(5.898_262_500_762_939_453_125))
+                .mla(d, F32x::splat(-3.809_541_702_270_507_812_5))
+                .mla(d, F32x::splat(2.224_125_623_703_002_929_687_5));
 
             let mut y = d * x * x;
-            y = (y - F32x::splat(2. / 3.) * y * y.mul_add(x, F32x::splat(-1.))) * q;
+            y = (y - F32x::splat(2. / 3.) * y * y.mla(x, F32x::splat(-1.))) * q;
 
             /*if cfg!(feature = "enable_avx512f") || cfg!(feature = "enable_avx512fnofma") {
                 y = s.is_infinite().select(INFINITY.mul_sign(s), y);
@@ -1206,7 +1206,7 @@ macro_rules! impl_math_f32_u35 {
             let max = x.simd_max(y);
 
             let t = min / max;
-            let mut ret = max * t.mul_add(t, ONE).sqrt();
+            let mut ret = max * t.mla(t, ONE).sqrt();
             ret = min.simd_eq(ZERO).select(max, ret);
             ret = (x.is_nan() | y.is_nan()).select(NAN, ret);
             (x.simd_eq(INFINITY) | y.simd_eq(INFINITY)).select(INFINITY, ret)
diff --git a/src/f64.rs b/src/f64.rs
index 1d9dfba..67267f8 100644
--- a/src/f64.rs
+++ b/src/f64.rs
@@ -497,8 +497,12 @@ impl BitsType for f64 {
 
 impl MulAdd for f64 {
     #[inline]
-    fn mul_add(self, y: Self, z: Self) -> Self {
-        self * y + z
+    fn mla(self, y: Self, z: Self) -> Self {
+        if cfg!(target_feature = "fma") {
+            self.mul_add(y, z)
+        } else {
+            self * y + z
+        }
     }
 }
 
@@ -634,15 +638,15 @@ fn rempisub(x: f64) -> (f64, i32) {
     let rint4x = if fabsk(4.0 * x) > D1_52 {
         4.0 * x
     } else {
-        (4.0.mul_add(x, c) - c).or_sign(x)
+        (4.0.mla(x, c) - c).or_sign(x)
     };
     let rintx = if fabsk(x) > D1_52 {
         x
     } else {
         (x + c - c).or_sign(x)
     };
-    let retd = (-0.25).mul_add(rint4x, x);
-    let reti = (-4_f64).mul_add(rintx, rint4x) as i32;
+    let retd = (-0.25).mla(rint4x, x);
+    let reti = (-4_f64).mla(rintx, rint4x) as i32;
     (retd, reti)
 }
 
@@ -693,7 +697,7 @@ fn sinpik(d: f64) -> Doubled<f64> {
     } else {
         -2.024_611_207_851_823_992_958_68_e-14
     })
-    .mul_add(
+    .mla(
         s,
         if o {
             -3.897_962_260_629_327_991_640_47_e-13
@@ -701,7 +705,7 @@ fn sinpik(d: f64) -> Doubled<f64> {
             6.948_218_305_801_794_613_277_84_e-12
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             1.150_115_825_399_960_352_669_01_e-10
@@ -709,7 +713,7 @@ fn sinpik(d: f64) -> Doubled<f64> {
             -1.757_247_499_528_531_799_526_64_e-9
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             -2.461_136_950_104_469_749_535_9_e-8
@@ -717,7 +721,7 @@ fn sinpik(d: f64) -> Doubled<f64> {
             3.133_616_889_668_683_928_784_22_e-7
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             3.590_860_448_590_527_540_050_62_e-6
@@ -725,7 +729,7 @@ fn sinpik(d: f64) -> Doubled<f64> {
             -3.657_620_418_216_155_192_036_1_e-5
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             -0.000_325_991_886_927_389_905_997_954
@@ -778,15 +782,15 @@ fn expk2(d: Doubled<f64>) -> Doubled<f64> {
     let s = d + qf * (-L2_U) + qf * (-L2_L);
 
     let u = 0.160_247_221_970_993_207_2_e-9_f64
-        .mul_add(s.0, 0.209_225_518_356_315_700_7_e-8)
-        .mul_add(s.0, 0.250_523_002_378_264_446_5_e-7)
-        .mul_add(s.0, 0.275_572_480_090_213_530_3_e-6)
-        .mul_add(s.0, 0.275_573_189_238_604_437_3_e-5)
-        .mul_add(s.0, 0.248_015_873_560_581_506_5_e-4)
-        .mul_add(s.0, 0.198_412_698_414_807_185_8_e-3)
-        .mul_add(s.0, 0.138_888_888_888_676_325_5_e-2)
-        .mul_add(s.0, 0.833_333_333_333_334_709_5_e-2)
-        .mul_add(s.0, 0.416_666_666_666_666_990_5_e-1);
+        .mla(s.0, 0.209_225_518_356_315_700_7_e-8)
+        .mla(s.0, 0.250_523_002_378_264_446_5_e-7)
+        .mla(s.0, 0.275_572_480_090_213_530_3_e-6)
+        .mla(s.0, 0.275_573_189_238_604_437_3_e-5)
+        .mla(s.0, 0.248_015_873_560_581_506_5_e-4)
+        .mla(s.0, 0.198_412_698_414_807_185_8_e-3)
+        .mla(s.0, 0.138_888_888_888_676_325_5_e-2)
+        .mla(s.0, 0.833_333_333_333_334_709_5_e-2)
+        .mla(s.0, 0.416_666_666_666_666_990_5_e-1);
 
     let mut t = s * u + 0.166_666_666_666_666_657_4;
     t = s * t + 0.5;
@@ -1104,7 +1108,7 @@ pub fn fmod(x: f64, y: f64) -> f64 {
 
     #[inline]
     fn trunc_positive(x: f64) -> f64 {
-        let fr = (-D1_31).mul_add((x * (1. / D1_31)) as i32 as f64, x);
+        let fr = (-D1_31).mla((x * (1. / D1_31)) as i32 as f64, x);
         if fabsk(x) >= D1_52 {
             x
         } else {
diff --git a/src/f64/u05.rs b/src/f64/u05.rs
index 1bd1d2c..f0b7462 100644
--- a/src/f64/u05.rs
+++ b/src/f64/u05.rs
@@ -20,11 +20,11 @@ pub fn sincospi(d: f64) -> (f64, f64) {
     //
 
     let u = (-2.024_611_207_851_823_992_958_68_e-14_f64)
-        .mul_add(s, 6.948_218_305_801_794_613_277_84_e-12)
-        .mul_add(s, -1.757_247_499_528_531_799_526_64_e-9)
-        .mul_add(s, 3.133_616_889_668_683_928_784_22_e-7)
-        .mul_add(s, -3.657_620_418_216_155_192_036_1_e-5)
-        .mul_add(s, 0.002_490_394_570_192_718_502_743_56);
+        .mla(s, 6.948_218_305_801_794_613_277_84_e-12)
+        .mla(s, -1.757_247_499_528_531_799_526_64_e-9)
+        .mla(s, 3.133_616_889_668_683_928_784_22_e-7)
+        .mla(s, -3.657_620_418_216_155_192_036_1_e-5)
+        .mla(s, 0.002_490_394_570_192_718_502_743_56);
     let mut x = u * s
         + Doubled::new(
             -0.080_745_512_188_280_785_248_473_1,
@@ -42,11 +42,11 @@ pub fn sincospi(d: f64) -> (f64, f64) {
     //
 
     let u = 9.944_803_876_268_437_740_902_08_e-16_f64
-        .mul_add(s, -3.897_962_260_629_327_991_640_47_e-13)
-        .mul_add(s, 1.150_115_825_399_960_352_669_01_e-10)
-        .mul_add(s, -2.461_136_950_104_469_749_535_9_e-8)
-        .mul_add(s, 3.590_860_448_590_527_540_050_62_e-6)
-        .mul_add(s, -0.000_325_991_886_927_389_905_997_954);
+        .mla(s, -3.897_962_260_629_327_991_640_47_e-13)
+        .mla(s, 1.150_115_825_399_960_352_669_01_e-10)
+        .mla(s, -2.461_136_950_104_469_749_535_9_e-8)
+        .mla(s, 3.590_860_448_590_527_540_050_62_e-6)
+        .mla(s, -0.000_325_991_886_927_389_905_997_954);
     x = u * s
         + Doubled::new(
             0.015_854_344_243_815_501_891_425_9,
@@ -155,7 +155,7 @@ fn cospik(d: f64) -> Doubled<f64> {
     } else {
         -2.024_611_207_851_823_992_958_68_e-14
     })
-    .mul_add(
+    .mla(
         s,
         if o {
             -3.897_962_260_629_327_991_640_47_e-13
@@ -163,7 +163,7 @@ fn cospik(d: f64) -> Doubled<f64> {
             6.948_218_305_801_794_613_277_84_e-12
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             1.150_115_825_399_960_352_669_01_e-10
@@ -171,7 +171,7 @@ fn cospik(d: f64) -> Doubled<f64> {
             -1.757_247_499_528_531_799_526_64_e-9
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             -2.461_136_950_104_469_749_535_9_e-8
@@ -179,7 +179,7 @@ fn cospik(d: f64) -> Doubled<f64> {
             3.133_616_889_668_683_928_784_22_e-7
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             3.590_860_448_590_527_540_050_62_e-6
@@ -187,7 +187,7 @@ fn cospik(d: f64) -> Doubled<f64> {
             -3.657_620_418_216_155_192_036_1_e-5
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             -0.000_325_991_886_927_389_905_997_954
diff --git a/src/f64/u10.rs b/src/f64/u10.rs
index 877f4ec..878b52a 100644
--- a/src/f64/u10.rs
+++ b/src/f64/u10.rs
@@ -13,13 +13,13 @@ pub fn sin(d: f64) -> f64 {
     if fabsk(d) < TRIGRANGEMAX2 {
         let qlf = rintk(d * FRAC_1_PI);
         ql = qlf as isize;
-        s = qlf.mul_add(-PI_A2, d).add_checked_as_doubled(qlf * -PI_B2);
+        s = qlf.mla(-PI_A2, d).add_checked_as_doubled(qlf * -PI_B2);
     } else if fabsk(d) < TRIGRANGEMAX {
         let dqh = trunck(d * (FRAC_1_PI / D1_24)) * D1_24;
-        let qlf = rintk(d.mul_add(FRAC_1_PI, -dqh));
+        let qlf = rintk(d.mla(FRAC_1_PI, -dqh));
         ql = qlf as isize;
 
-        s = dqh.mul_add(-PI_A, d).add_checked_as_doubled(qlf * -PI_A);
+        s = dqh.mla(-PI_A, d).add_checked_as_doubled(qlf * -PI_A);
         s += dqh * -PI_B;
         s += qlf * -PI_B;
         s += dqh * -PI_C;
@@ -58,7 +58,7 @@ pub fn sin(d: f64) -> f64 {
         2.755_731_921_044_282_247_773_79_e-6,
         -0.000_198_412_698_412_046_454_654_947,
     )
-    .mul_add(s.0, 0.008_333_333_333_333_180_562_019_22);
+    .mla(s.0, 0.008_333_333_333_333_180_562_019_22);
 
     let x =
         (1.).add_checked((-0.166_666_666_666_666_657_414_808).add_checked_as_doubled(u * s.0) * s);
@@ -90,7 +90,7 @@ pub fn cos(d: f64) -> f64 {
     let d = fabsk(d);
 
     if d < TRIGRANGEMAX2 {
-        ql = (2_f64).mul_add(rintk(d * FRAC_1_PI - 0.5), 1.) as isize;
+        ql = (2_f64).mla(rintk(d * FRAC_1_PI - 0.5), 1.) as isize;
         let qlf = ql as f64;
         s = d
             .add_as_doubled(qlf * (-PI_A2 * 0.5))
@@ -101,7 +101,7 @@ pub fn cos(d: f64) -> f64 {
         ql = qlf as isize;
         dqh *= D1_24;
 
-        let u = dqh.mul_add(-PI_A * 0.5, d);
+        let u = dqh.mla(-PI_A * 0.5, d);
         s = u.add_as_doubled(qlf * (-PI_A * 0.5));
         s += dqh * (-PI_B * 0.5);
         s += qlf * (-PI_B * 0.5);
@@ -140,7 +140,7 @@ pub fn cos(d: f64) -> f64 {
         2.755_731_921_044_282_247_773_79_e-6,
         -0.000_198_412_698_412_046_454_654_947,
     )
-    .mul_add(s.0, 0.008_333_333_333_333_180_562_019_22);
+    .mla(s.0, 0.008_333_333_333_333_180_562_019_22);
 
     let x =
         (1.).add_checked((-0.166_666_666_666_666_657_414_808).add_checked_as_doubled(u * s.0) * s);
@@ -174,7 +174,7 @@ pub fn sincos(d: f64) -> (f64, f64) {
         let qlf = rintk(d * (FRAC_2_PI));
         ql = qlf as isize;
         s = qlf
-            .mul_add(-PI_A2 * 0.5, d)
+            .mla(-PI_A2 * 0.5, d)
             .add_checked_as_doubled(qlf * (-PI_B2 * 0.5));
     } else if fabsk(d) < TRIGRANGEMAX {
         let dqh = trunck(d * ((FRAC_2_PI) / D1_24)) * D1_24;
@@ -182,7 +182,7 @@ pub fn sincos(d: f64) -> (f64, f64) {
         ql = qlf as isize;
 
         s = dqh
-            .mul_add(-PI_A * 0.5, d)
+            .mla(-PI_A * 0.5, d)
             .add_checked_as_doubled(qlf * (-PI_A * 0.5));
         s += dqh * (-PI_B * 0.5);
         s += qlf * (-PI_B * 0.5);
@@ -202,11 +202,11 @@ pub fn sincos(d: f64) -> (f64, f64) {
     s.0 = s.square_as_f();
 
     let u = 1.589_383_072_832_289_373_285_11_e-10_f64
-        .mul_add(s.0, -2.505_069_435_025_397_733_493_18_e-8)
-        .mul_add(s.0, 2.755_731_317_768_463_605_125_47_e-6)
-        .mul_add(s.0, -0.000_198_412_698_278_911_770_864_914)
-        .mul_add(s.0, 0.008_333_333_333_319_184_596_174_6)
-        .mul_add(s.0, -0.166_666_666_666_666_130_709_393)
+        .mla(s.0, -2.505_069_435_025_397_733_493_18_e-8)
+        .mla(s.0, 2.755_731_317_768_463_605_125_47_e-6)
+        .mla(s.0, -0.000_198_412_698_278_911_770_864_914)
+        .mla(s.0, 0.008_333_333_333_319_184_596_174_6)
+        .mla(s.0, -0.166_666_666_666_666_130_709_393)
         * s.0
         * t.0;
 
@@ -214,12 +214,12 @@ pub fn sincos(d: f64) -> (f64, f64) {
     let mut rsin = if d.is_neg_zero() { -0. } else { f64::from(x) };
 
     let u = (-1.136_153_502_390_974_295_315_23_e-11_f64)
-        .mul_add(s.0, 2.087_574_712_070_400_554_793_66_e-9)
-        .mul_add(s.0, -2.755_731_440_288_475_674_985_67_e-7)
-        .mul_add(s.0, 2.480_158_728_900_018_673_119_15_e-5)
-        .mul_add(s.0, -0.001_388_888_888_887_140_192_823_29)
-        .mul_add(s.0, 0.041_666_666_666_666_551_959_206_2)
-        .mul_add(s.0, -0.5);
+        .mla(s.0, 2.087_574_712_070_400_554_793_66_e-9)
+        .mla(s.0, -2.755_731_440_288_475_674_985_67_e-7)
+        .mla(s.0, 2.480_158_728_900_018_673_119_15_e-5)
+        .mla(s.0, -0.001_388_888_888_887_140_192_823_29)
+        .mla(s.0, 0.041_666_666_666_666_551_959_206_2)
+        .mla(s.0, -0.5);
 
     let x = (1.).add_checked(s.0.mul_as_doubled(u));
     let mut rcos = f64::from(x);
@@ -262,7 +262,7 @@ pub fn tan(d: f64) -> f64 {
         let qlf = rintk(d * (2. * FRAC_1_PI));
         ql = qlf as isize;
         s = qlf
-            .mul_add(-PI_A2 * 0.5, d)
+            .mla(-PI_A2 * 0.5, d)
             .add_checked_as_doubled(qlf * (-PI_B2 * 0.5));
     } else if fabsk(d) < TRIGRANGEMAX {
         let dqh = trunck(d * (FRAC_2_PI / D1_24)) * D1_24;
@@ -272,7 +272,7 @@ pub fn tan(d: f64) -> f64 {
         let qlf = ql as f64;
 
         s = dqh
-            .mul_add(-PI_A * 0.5, d)
+            .mla(-PI_A * 0.5, d)
             .add_checked_as_doubled(qlf * (-PI_A * 0.5));
         s += dqh * (-PI_B * 0.5);
         s += qlf * (-PI_B * 0.5);
@@ -306,7 +306,7 @@ pub fn tan(d: f64) -> f64 {
         0.539_682_539_951_727_297_e-1,
         0.133_333_333_333_050_058_1,
     )
-    .mul_add(s.0, 0.333_333_333_333_334_369_5);
+    .mla(s.0, 0.333_333_333_333_334_369_5);
 
     let mut x = t.add_checked(s * t * u);
 
@@ -378,10 +378,10 @@ fn atan2k_u1(mut y: Doubled<f64>, mut x: Doubled<f64>) -> Doubled<f64> {
         0.076_922_533_029_620_376_865_409_5,
         -0.090_909_044_277_338_757_478_190_7,
     )
-    .mul_add(t.0, 0.111_111_108_376_896_236_538_123)
-    .mul_add(t.0, -0.142_857_142_756_268_568_062_339)
-    .mul_add(t.0, 0.199_999_999_997_977_351_284_817)
-    .mul_add(t.0, -0.333_333_333_333_317_605_173_818);
+    .mla(t.0, 0.111_111_108_376_896_236_538_123)
+    .mla(t.0, -0.142_857_142_756_268_568_062_339)
+    .mla(t.0, 0.199_999_999_997_977_351_284_817)
+    .mla(t.0, -0.333_333_333_333_317_605_173_818);
 
     t *= u;
     t = s * (1.).add_checked(t);
@@ -677,7 +677,7 @@ fn logk2(d: Doubled<f64>) -> Doubled<f64> {
         0.285_714_285_511_134_091_777_308,
         0.400_000_000_000_914_013_309_483,
     )
-    .mul_add(x2.0, 0.666_666_666_666_664_853_302_393);
+    .mla(x2.0, 0.666_666_666_666_664_853_302_393);
 
     (D_LN2 * (e as f64)) + x.scale(2.) + x2 * x * t
 }
@@ -968,7 +968,7 @@ pub fn log1p(d: f64) -> f64 {
     let mut e = ilogb2k(dp1 * (1. / 0.75));
 
     let t = ldexp3k(1., -e);
-    let m = d.mul_add(t, t - 1.);
+    let m = d.mla(t, t - 1.);
 
     if o {
         e -= 64;
@@ -1023,8 +1023,8 @@ pub fn exp(d: f64) -> f64 {
     let qf = rintk(d * R_LN2);
     let q = qf as i32;
 
-    let s = qf.mul_add(-L2_U, d);
-    let s = qf.mul_add(-L2_L, s);
+    let s = qf.mla(-L2_U, d);
+    let s = qf.mla(-L2_L, s);
 
     let s2 = s * s;
     let s4 = s2 * s2;
@@ -1046,7 +1046,7 @@ pub fn exp(d: f64) -> f64 {
         0.041_666_666_666_666_504_759_142_2,
         0.166_666_666_666_666_851_703_837,
     )
-    .mul_add(s, 0.5);
+    .mla(s, 0.5);
 
     u = s * s * u + s + 1.;
 
@@ -1071,20 +1071,20 @@ fn test_exp() {
 pub fn exp10(d: f64) -> f64 {
     let q = rintk(d * LOG10_2) as i32;
     let qf = q as f64;
-    let s = qf.mul_add(-L10_U, d);
-    let s = qf.mul_add(-L10_L, s);
+    let s = qf.mla(-L10_U, d);
+    let s = qf.mla(-L10_L, s);
 
     let mut u = 0.241_146_349_833_426_765_2_e-3_f64
-        .mul_add(s, 0.115_748_841_521_718_737_5_e-2)
-        .mul_add(s, 0.501_397_554_678_973_365_9_e-2)
-        .mul_add(s, 0.195_976_232_072_053_308_e-1)
-        .mul_add(s, 0.680_893_639_944_678_413_8_e-1)
-        .mul_add(s, 0.206_995_849_472_267_623_4)
-        .mul_add(s, 0.539_382_929_205_853_622_9)
-        .mul_add(s, 0.117_125_514_890_854_165_5_e+1)
-        .mul_add(s, 0.203_467_859_229_343_295_3_e+1)
-        .mul_add(s, 0.265_094_905_523_920_587_6_e+1)
-        .mul_add(s, 0.230_258_509_299_404_590_1_e+1);
+        .mla(s, 0.115_748_841_521_718_737_5_e-2)
+        .mla(s, 0.501_397_554_678_973_365_9_e-2)
+        .mla(s, 0.195_976_232_072_053_308_e-1)
+        .mla(s, 0.680_893_639_944_678_413_8_e-1)
+        .mla(s, 0.206_995_849_472_267_623_4)
+        .mla(s, 0.539_382_929_205_853_622_9)
+        .mla(s, 0.117_125_514_890_854_165_5_e+1)
+        .mla(s, 0.203_467_859_229_343_295_3_e+1)
+        .mla(s, 0.265_094_905_523_920_587_6_e+1)
+        .mla(s, 0.230_258_509_299_404_590_1_e+1);
     u = (1.).add_checked(u.mul_as_doubled(s)).normalize().0;
 
     if d > 308.254_715_559_916_71 {
@@ -1152,7 +1152,7 @@ pub fn exp2(d: f64) -> f64 {
         0.555_041_086_648_204_659_6_e-1,
         0.240_226_506_959_101_221_4,
     )
-    .mul_add(s, 0.693_147_180_559_945_286_2);
+    .mla(s, 0.693_147_180_559_945_286_2);
 
     u = (1.).add_checked(u.mul_as_doubled(s)).normalize().0;
 
@@ -1358,11 +1358,11 @@ pub fn cbrt(d: f64) -> f64 {
     let d = fabsk(d);
 
     let mut x = (-0.640_245_898_480_692_909_870_982_f64)
-        .mul_add(d, 2.961_551_030_200_395_118_185_95)
-        .mul_add(d, -5.733_530_609_229_478_436_361_66)
-        .mul_add(d, 6.039_903_689_894_587_479_614_07)
-        .mul_add(d, -3.858_419_355_104_449_888_216_32)
-        .mul_add(d, 2.230_727_530_249_660_972_572_2);
+        .mla(d, 2.961_551_030_200_395_118_185_95)
+        .mla(d, -5.733_530_609_229_478_436_361_66)
+        .mla(d, 6.039_903_689_894_587_479_614_07)
+        .mla(d, -3.858_419_355_104_449_888_216_32)
+        .mla(d, 2.230_727_530_249_660_972_572_2);
 
     let mut y = x * x;
     y = y * y;
@@ -1430,7 +1430,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
     } else {
         0.707_481_600_086_460_927_9_e-7
     })
-    .mul_add(
+    .mla(
         t,
         if o2 {
             1.120_804_464_289_911_606_838_558_160_000
@@ -1440,7 +1440,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.400_924_433_300_873_044_3_e-6
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             13.397_985_455_142_589_218_333_060_200_00
@@ -1450,7 +1450,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.104_011_464_162_824_694_6_e-5
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.116_546_276_599_463_200_848_033_357_000
@@ -1460,7 +1460,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.150_834_915_073_332_916_7_e-5
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -1.391_801_093_265_337_481_495_562_410_000
@@ -1470,7 +1470,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.128_814_307_493_390_102_e-5
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.015_056_113_040_026_424_412_918_973_400
@@ -1480,7 +1480,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.474_416_774_988_499_393_7_e-6
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.179_540_117_061_234_856_098_844_714_000
@@ -1490,7 +1490,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.655_481_630_654_248_990_2_e-7
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.002_481_743_600_264_997_730_942_489_280
@@ -1500,7 +1500,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.318_925_247_145_259_984_4_e-6
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.029_527_880_945_699_120_504_851_034_100
@@ -1510,7 +1510,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.135_888_382_147_035_537_7_e-6
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.000_540_164_767_892_604_515_196_325_186
@@ -1520,7 +1520,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.434_393_127_715_733_604_e-6
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.006_403_362_833_808_069_794_787_256_200
@@ -1530,7 +1530,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.972_478_589_740_677_955_5_e-6
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.000_162_516_262_783_915_816_896_611_252
@@ -1540,7 +1540,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.203_688_605_722_596_601_1_e-5
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.001_914_438_498_565_477_526_465_972_390
@@ -1550,7 +1550,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.437_336_314_181_972_581_5_e-5
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             7.204_895_416_020_010_558_983_115_17_e-5
@@ -1560,7 +1560,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.943_995_126_830_400_867_7_e-5
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.000_839_498_720_672_087_279_971_000_786
@@ -1570,7 +1570,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.205_072_703_037_638_980_4_e-4
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -5.171_790_908_260_592_193_293_944_22_e-5
@@ -1580,7 +1580,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.449_262_018_343_118_401_8_e-4
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.000_592_166_437_353_693_882_857_342_347
@@ -1590,7 +1590,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.994_575_123_607_187_593_1_e-4
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             6.972_813_758_365_857_774_037_435_39_e-5
@@ -1600,7 +1600,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.223_154_759_903_498_319_6_e-3
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.000_784_039_221_720_066_627_493_314_301
@@ -1610,7 +1610,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.509_669_524_710_196_762_2_e-3
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.000_229_472_093_621_399_176_949_318_732
@@ -1620,7 +1620,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.119_275_391_166_788_697_1_e-2
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             -0.002_681_327_160_493_827_160_473_958_490
@@ -1630,7 +1630,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             0.289_051_033_074_221_031_e-2
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.003_472_222_222_222_222_222_175_164_840
@@ -1640,7 +1640,7 @@ fn gammak(a: f64) -> (Doubled<f64>, Doubled<f64>) {
             -0.738_555_102_867_446_185_8_e-2
         },
     )
-    .mul_add(
+    .mla(
         t,
         if o2 {
             0.083_333_333_333_333_333_335_592_087_900
diff --git a/src/f64/u15.rs b/src/f64/u15.rs
index 60cfb66..e15862c 100644
--- a/src/f64/u15.rs
+++ b/src/f64/u15.rs
@@ -30,7 +30,7 @@ pub fn erfc(a: f64) -> f64 {
     } else {
         0.233_424_972_963_870_131_9_e+5
     })
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.216_176_624_757_005_566_9_e-18
@@ -42,7 +42,7 @@ pub fn erfc(a: f64) -> f64 {
             -0.469_566_104_493_310_776_9_e+5
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.469_591_917_330_159_567_e-17
@@ -54,7 +54,7 @@ pub fn erfc(a: f64) -> f64 {
             0.317_340_310_874_864_335_3_e+5
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.904_914_041_988_800_712_2_e-16
@@ -66,7 +66,7 @@ pub fn erfc(a: f64) -> f64 {
             0.324_298_278_695_957_378_7_e+4
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.163_401_890_355_741_072_8_e-14
@@ -78,7 +78,7 @@ pub fn erfc(a: f64) -> f64 {
             -0.201_471_799_976_034_781_1_e+5
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.278_348_578_633_345_174_5_e-13
@@ -90,7 +90,7 @@ pub fn erfc(a: f64) -> f64 {
             0.155_400_697_096_711_828_6_e+5
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.446_322_127_678_641_575_2_e-12
@@ -102,7 +102,7 @@ pub fn erfc(a: f64) -> f64 {
             -0.615_087_419_056_355_429_3_e+4
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.671_136_662_285_013_656_3_e-11
@@ -114,7 +114,7 @@ pub fn erfc(a: f64) -> f64 {
             0.124_004_776_563_481_573_2_e+4
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.942_275_905_023_266_222_3_e-10
@@ -126,7 +126,7 @@ pub fn erfc(a: f64) -> f64 {
             -0.821_032_547_575_269_973_1_e+2
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.122_905_553_010_022_909_8_e-8
@@ -138,7 +138,7 @@ pub fn erfc(a: f64) -> f64 {
             0.324_244_388_083_993_087_e+2
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.148_071_928_158_508_651_2_e-7
@@ -150,7 +150,7 @@ pub fn erfc(a: f64) -> f64 {
             -0.292_341_886_383_316_058_6_e+2
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.163_658_446_912_339_980_3_e-6
@@ -162,7 +162,7 @@ pub fn erfc(a: f64) -> f64 {
             0.345_746_173_281_438_307_1
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.164_621_143_658_892_357_5_e-5
@@ -174,7 +174,7 @@ pub fn erfc(a: f64) -> f64 {
             0.548_973_015_595_239_299_8_e+1
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.149_256_503_584_062_351_1_e-4
@@ -186,7 +186,7 @@ pub fn erfc(a: f64) -> f64 {
             0.155_993_413_225_129_413_4_e-2
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.120_553_329_817_896_785_1_e-3
@@ -198,7 +198,7 @@ pub fn erfc(a: f64) -> f64 {
             -0.154_174_156_683_152_063_8_e+1
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.854_832_702_345_085_008_1_e-3
@@ -210,7 +210,7 @@ pub fn erfc(a: f64) -> f64 {
             0.282_315_223_055_836_418_6_e-5
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             0.522_397_762_544_218_793_2_e-2
@@ -222,7 +222,7 @@ pub fn erfc(a: f64) -> f64 {
             0.624_999_918_419_534_283_8
         },
     )
-    .mul_add(
+    .mla(
         u.0,
         if o0 {
             -0.268_661_706_451_312_522_2_e-1
diff --git a/src/f64/u35.rs b/src/f64/u35.rs
index 4f8f88a..e6a230a 100644
--- a/src/f64/u35.rs
+++ b/src/f64/u35.rs
@@ -12,20 +12,20 @@ pub fn sin(mut d: f64) -> f64 {
     if fabsk(d) < TRIGRANGEMAX2 {
         let qlf = rintk(d * FRAC_1_PI);
         ql = qlf as isize;
-        d = qlf.mul_add(-PI_A2, d);
-        d = qlf.mul_add(-PI_B2, d);
+        d = qlf.mla(-PI_A2, d);
+        d = qlf.mla(-PI_B2, d);
     } else if fabsk(d) < TRIGRANGEMAX {
         let dqh = trunck(d * (FRAC_1_PI / D1_24)) * D1_24;
-        let qlf = rintk(d.mul_add(FRAC_1_PI, -dqh));
+        let qlf = rintk(d.mla(FRAC_1_PI, -dqh));
         ql = qlf as isize;
 
-        d = dqh.mul_add(-PI_A, d);
-        d = qlf.mul_add(-PI_A, d);
-        d = dqh.mul_add(-PI_B, d);
-        d = qlf.mul_add(-PI_B, d);
-        d = dqh.mul_add(-PI_C, d);
-        d = qlf.mul_add(-PI_C, d);
-        d = (dqh + qlf).mul_add(-PI_D, d);
+        d = dqh.mla(-PI_A, d);
+        d = qlf.mla(-PI_A, d);
+        d = dqh.mla(-PI_B, d);
+        d = qlf.mla(-PI_B, d);
+        d = dqh.mla(-PI_C, d);
+        d = qlf.mla(-PI_C, d);
+        d = (dqh + qlf).mla(-PI_D, d);
     } else {
         let (mut ddidd, ddii) = rempi(t);
         ql = (((ddii & 3) * 2 + ((ddidd.0 > 0.) as i32) + 1) >> 2) as isize;
@@ -64,12 +64,12 @@ pub fn sin(mut d: f64) -> f64 {
         -0.000_198_412_698_412_696_162_806_809,
         0.008_333_333_333_333_329_748_238_15,
     )
-    .mul_add(s, -0.166_666_666_666_666_657_414_808);
+    .mla(s, -0.166_666_666_666_666_657_414_808);
 
     if t.is_neg_zero() {
         t
     } else {
-        s.mul_add(u * d, d)
+        s.mla(u * d, d)
     }
 }
 
@@ -87,23 +87,23 @@ pub fn cos(mut d: f64) -> f64 {
     let ql: isize;
 
     if fabsk(d) < TRIGRANGEMAX2 {
-        let qlf = (2_f64).mul_add(rintk(d * FRAC_1_PI - 0.5), 1.);
+        let qlf = (2_f64).mla(rintk(d * FRAC_1_PI - 0.5), 1.);
         ql = qlf as isize;
-        d = qlf.mul_add(-PI_A2 * 0.5, d);
-        d = qlf.mul_add(-PI_B2 * 0.5, d);
+        d = qlf.mla(-PI_A2 * 0.5, d);
+        d = qlf.mla(-PI_B2 * 0.5, d);
     } else if fabsk(d) < TRIGRANGEMAX {
         let mut dqh = trunck(d * (FRAC_1_PI / D1_23) - 0.5 * (FRAC_1_PI / D1_23));
         let qlf = 2. * rintk(d * FRAC_1_PI - 0.5 - dqh * D1_23) + 1.;
         ql = qlf as isize;
         dqh *= D1_24;
 
-        d = dqh.mul_add(-PI_A * 0.5, d);
-        d = qlf.mul_add(-PI_A * 0.5, d);
-        d = dqh.mul_add(-PI_B * 0.5, d);
-        d = qlf.mul_add(-PI_B * 0.5, d);
-        d = dqh.mul_add(-PI_C * 0.5, d);
-        d = qlf.mul_add(-PI_C * 0.5, d);
-        d = (dqh + qlf).mul_add(-PI_D * 0.5, d);
+        d = dqh.mla(-PI_A * 0.5, d);
+        d = qlf.mla(-PI_A * 0.5, d);
+        d = dqh.mla(-PI_B * 0.5, d);
+        d = qlf.mla(-PI_B * 0.5, d);
+        d = dqh.mla(-PI_C * 0.5, d);
+        d = qlf.mla(-PI_C * 0.5, d);
+        d = (dqh + qlf).mla(-PI_D * 0.5, d);
     } else {
         let (mut ddidd, ddii) = rempi(t);
         ql = (((ddii & 3) * 2 + ((ddidd.0 > 0.) as i32) + 7) >> 1) as isize;
@@ -142,9 +142,9 @@ pub fn cos(mut d: f64) -> f64 {
         -0.000_198_412_698_412_696_162_806_809,
         0.008_333_333_333_333_329_748_238_15,
     )
-    .mul_add(s, -0.166_666_666_666_666_657_414_808);
+    .mla(s, -0.166_666_666_666_666_657_414_808);
 
-    s.mul_add(u * d, d)
+    s.mla(u * d, d)
 }
 
 #[test]
@@ -166,20 +166,20 @@ pub fn sincos(d: f64) -> (f64, f64) {
     if fabsk(d) < TRIGRANGEMAX2 {
         let qlf = rintk(s * FRAC_2_PI);
         ql = qlf as isize;
-        s = qlf.mul_add(-PI_A2 * 0.5, s);
-        s = qlf.mul_add(-PI_B2 * 0.5, s);
+        s = qlf.mla(-PI_A2 * 0.5, s);
+        s = qlf.mla(-PI_B2 * 0.5, s);
     } else if fabsk(d) < TRIGRANGEMAX {
         let dqh = trunck(d * (FRAC_2_PI / D1_24)) * D1_24;
         let qlf = rintk(d * FRAC_2_PI - dqh);
         ql = qlf as isize;
 
-        s = dqh.mul_add(-PI_A * 0.5, s);
-        s = qlf.mul_add(-PI_A * 0.5, s);
-        s = dqh.mul_add(-PI_B * 0.5, s);
-        s = qlf.mul_add(-PI_B * 0.5, s);
-        s = dqh.mul_add(-PI_C * 0.5, s);
-        s = qlf.mul_add(-PI_C * 0.5, s);
-        s = (dqh + qlf).mul_add(-PI_D * 0.5, s);
+        s = dqh.mla(-PI_A * 0.5, s);
+        s = qlf.mla(-PI_A * 0.5, s);
+        s = dqh.mla(-PI_B * 0.5, s);
+        s = qlf.mla(-PI_B * 0.5, s);
+        s = dqh.mla(-PI_C * 0.5, s);
+        s = qlf.mla(-PI_C * 0.5, s);
+        s = (dqh + qlf).mla(-PI_D * 0.5, s);
     } else {
         let (ddidd, ddii) = rempi(d);
         ql = ddii as isize;
@@ -194,23 +194,23 @@ pub fn sincos(d: f64) -> (f64, f64) {
     s = s * s;
 
     let u = 1.589_383_072_832_289_373_285_11_e-10_f64
-        .mul_add(s, -2.505_069_435_025_397_733_493_18_e-8)
-        .mul_add(s, 2.755_731_317_768_463_605_125_47_e-6)
-        .mul_add(s, -0.000_198_412_698_278_911_770_864_914)
-        .mul_add(s, 0.008_333_333_333_319_184_596_174_6)
-        .mul_add(s, -0.166_666_666_666_666_130_709_393)
+        .mla(s, -2.505_069_435_025_397_733_493_18_e-8)
+        .mla(s, 2.755_731_317_768_463_605_125_47_e-6)
+        .mla(s, -0.000_198_412_698_278_911_770_864_914)
+        .mla(s, 0.008_333_333_333_319_184_596_174_6)
+        .mla(s, -0.166_666_666_666_666_130_709_393)
         * s
         * t;
 
     let mut rsin = if d.is_neg_zero() { -0. } else { t + u };
 
     let u = (-1.136_153_502_390_974_295_315_23_e-11_f64)
-        .mul_add(s, 2.087_574_712_070_400_554_793_66_e-9)
-        .mul_add(s, -2.755_731_440_288_475_674_985_67_e-7)
-        .mul_add(s, 2.480_158_728_900_018_673_119_15_e-5)
-        .mul_add(s, -0.001_388_888_888_887_140_192_823_29)
-        .mul_add(s, 0.041_666_666_666_666_551_959_206_2)
-        .mul_add(s, -0.5);
+        .mla(s, 2.087_574_712_070_400_554_793_66_e-9)
+        .mla(s, -2.755_731_440_288_475_674_985_67_e-7)
+        .mla(s, 2.480_158_728_900_018_673_119_15_e-5)
+        .mla(s, -0.001_388_888_888_887_140_192_823_29)
+        .mla(s, 0.041_666_666_666_666_551_959_206_2)
+        .mla(s, -0.5);
 
     let mut rcos = u * s + 1.;
 
@@ -250,20 +250,20 @@ pub fn tan(d: f64) -> f64 {
     if fabsk(d) < TRIGRANGEMAX2 {
         let qlf = rintk(d * FRAC_2_PI);
         ql = qlf as isize;
-        x = qlf.mul_add(-PI_A2 * 0.5, d);
-        x = qlf.mul_add(-PI_B2 * 0.5, x);
+        x = qlf.mla(-PI_A2 * 0.5, d);
+        x = qlf.mla(-PI_B2 * 0.5, x);
     } else if fabsk(d) < 1e+6 {
         let dqh = trunck(d * (FRAC_2_PI / D1_24)) * D1_24;
         let qlf = rintk(d * FRAC_2_PI - dqh);
         ql = qlf as isize;
 
-        x = dqh.mul_add(-PI_A * 0.5, d);
-        x = qlf.mul_add(-PI_A * 0.5, x);
-        x = dqh.mul_add(-PI_B * 0.5, x);
-        x = qlf.mul_add(-PI_B * 0.5, x);
-        x = dqh.mul_add(-PI_C * 0.5, x);
-        x = qlf.mul_add(-PI_C * 0.5, x);
-        x = (dqh + qlf).mul_add(-PI_D * 0.5, x);
+        x = dqh.mla(-PI_A * 0.5, d);
+        x = qlf.mla(-PI_A * 0.5, x);
+        x = dqh.mla(-PI_B * 0.5, x);
+        x = qlf.mla(-PI_B * 0.5, x);
+        x = dqh.mla(-PI_C * 0.5, x);
+        x = qlf.mla(-PI_C * 0.5, x);
+        x = (dqh + qlf).mla(-PI_D * 0.5, x);
     } else {
         let (ddidd, ddii) = rempi(d);
         ql = ddii as isize;
@@ -292,10 +292,10 @@ pub fn tan(d: f64) -> f64 {
         0.539_682_539_951_727_297_e-1,
         0.133_333_333_333_050_058_1,
     )
-    .mul_add(s, 0.333_333_333_333_334_369_5);
-    u = s.mul_add(u * x, x);
+    .mla(s, 0.333_333_333_333_334_369_5);
+    u = s.mla(u * x, x);
 
-    let mut y = u.mul_add(u, -1.);
+    let mut y = u.mla(u, -1.);
     x = -2. * u;
 
     if (ql & 1) != 0 {
@@ -330,25 +330,25 @@ pub fn sincospi(d: f64) -> (f64, f64) {
     //
 
     let u = 0.688_063_889_476_606_013_6_e-11_f64
-        .mul_add(s, -0.175_715_956_454_231_019_9_e-8)
-        .mul_add(s, 0.313_361_632_725_786_731_1_e-6)
-        .mul_add(s, -0.365_762_041_638_848_645_2_e-4)
-        .mul_add(s, 0.249_039_457_018_993_210_3_e-2)
-        .mul_add(s, -0.807_455_121_882_805_632_e-1)
-        .mul_add(s, 0.785_398_163_397_448_279);
+        .mla(s, -0.175_715_956_454_231_019_9_e-8)
+        .mla(s, 0.313_361_632_725_786_731_1_e-6)
+        .mla(s, -0.365_762_041_638_848_645_2_e-4)
+        .mla(s, 0.249_039_457_018_993_210_3_e-2)
+        .mla(s, -0.807_455_121_882_805_632_e-1)
+        .mla(s, 0.785_398_163_397_448_279);
 
     let mut rsin = u * t;
 
     //
 
     let u = (-0.386_014_121_368_379_435_2_e-12_f64)
-        .mul_add(s, 0.115_005_788_802_968_141_5_e-9)
-        .mul_add(s, -0.246_113_649_300_666_355_3_e-7)
-        .mul_add(s, 0.359_086_044_662_351_671_3_e-5)
-        .mul_add(s, -0.325_991_886_926_943_594_2_e-3)
-        .mul_add(s, 0.158_543_442_438_154_116_9_e-1)
-        .mul_add(s, -0.308_425_137_534_042_437_3)
-        .mul_add(s, 1.);
+        .mla(s, 0.115_005_788_802_968_141_5_e-9)
+        .mla(s, -0.246_113_649_300_666_355_3_e-7)
+        .mla(s, 0.359_086_044_662_351_671_3_e-5)
+        .mla(s, -0.325_991_886_926_943_594_2_e-3)
+        .mla(s, 0.158_543_442_438_154_116_9_e-1)
+        .mla(s, -0.308_425_137_534_042_437_3)
+        .mla(s, 1.);
 
     let mut rcos = u;
 
@@ -527,7 +527,7 @@ pub fn asin(d: f64) -> f64 {
         0.750_000_000_037_858_161_1_e-1,
         0.166_666_666_666_649_754_3,
     )
-    .mul_add(x * x2, x);
+    .mla(x * x2, x);
 
     let r = if o { u } else { FRAC_PI_2 - 2. * u };
     r.mul_sign(d)
@@ -657,8 +657,8 @@ fn test_atan() {
 fn expm1k(d: f64) -> f64 {
     let q = rintk(d * R_LN2);
 
-    let s = q.mul_add(-L2_U, d);
-    let s = q.mul_add(-L2_L, s);
+    let s = q.mla(-L2_U, d);
+    let s = q.mla(-L2_L, s);
 
     let s2 = s * s;
     let s4 = s2 * s2;
@@ -681,7 +681,7 @@ fn expm1k(d: f64) -> f64 {
         0.166_666_666_666_666_851_703_837,
     );
 
-    u = s2.mul_add(0.5, s2 * s * u) + s;
+    u = s2.mla(0.5, s2 * s * u) + s;
 
     let q = q as i32;
     if q != 0 {
@@ -838,15 +838,15 @@ pub fn log2(mut d: f64) -> f64 {
     let x2 = x * x;
 
     let t = 0.221_194_175_045_608_149
-        .mul_add(x2, 0.220_076_869_315_227_768_9)
-        .mul_add(x2, 0.262_370_805_748_851_465_6)
-        .mul_add(x2, 0.320_597_747_794_449_550_2)
-        .mul_add(x2, 0.412_198_594_548_532_470_9)
-        .mul_add(x2, 0.577_078_016_299_705_898_2)
-        .mul_add(x2, 0.961_796_693_926_080_914_49);
+        .mla(x2, 0.220_076_869_315_227_768_9)
+        .mla(x2, 0.262_370_805_748_851_465_6)
+        .mla(x2, 0.320_597_747_794_449_550_2)
+        .mla(x2, 0.412_198_594_548_532_470_9)
+        .mla(x2, 0.577_078_016_299_705_898_2)
+        .mla(x2, 0.961_796_693_926_080_914_49);
 
     let s = (e as f64).add_checked((2.885_390_081_777_926_774).mul_as_doubled(x));
-    let r = t.mul_add(x * x2, f64::from(s));
+    let r = t.mla(x * x2, f64::from(s));
 
     if d == 0. {
         f64::NEG_INFINITY
@@ -870,21 +870,21 @@ fn test_log2() {
 pub fn exp10(d: f64) -> f64 {
     let q = rintk(d * LOG10_2);
 
-    let mut s = q.mul_add(-L10_U, d);
-    s = q.mul_add(-L10_L, s);
+    let mut s = q.mla(-L10_U, d);
+    s = q.mla(-L10_L, s);
 
     let mut u = 0.241_146_349_833_426_765_2_e-3
-        .mul_add(s, 0.115_748_841_521_718_737_5_e-2)
-        .mul_add(s, 0.501_397_554_678_973_365_9_e-2)
-        .mul_add(s, 0.195_976_232_072_053_308_e-1)
-        .mul_add(s, 0.680_893_639_944_678_413_8_e-1)
-        .mul_add(s, 0.206_995_849_472_267_623_4)
-        .mul_add(s, 0.539_382_929_205_853_622_9)
-        .mul_add(s, 0.117_125_514_890_854_165_5_e+1)
-        .mul_add(s, 0.203_467_859_229_343_295_3_e+1)
-        .mul_add(s, 0.265_094_905_523_920_587_6_e+1)
-        .mul_add(s, 0.230_258_509_299_404_590_1_e+1)
-        .mul_add(s, 0.1_e+1);
+        .mla(s, 0.115_748_841_521_718_737_5_e-2)
+        .mla(s, 0.501_397_554_678_973_365_9_e-2)
+        .mla(s, 0.195_976_232_072_053_308_e-1)
+        .mla(s, 0.680_893_639_944_678_413_8_e-1)
+        .mla(s, 0.206_995_849_472_267_623_4)
+        .mla(s, 0.539_382_929_205_853_622_9)
+        .mla(s, 0.117_125_514_890_854_165_5_e+1)
+        .mla(s, 0.203_467_859_229_343_295_3_e+1)
+        .mla(s, 0.265_094_905_523_920_587_6_e+1)
+        .mla(s, 0.230_258_509_299_404_590_1_e+1)
+        .mla(s, 0.1_e+1);
 
     u = ldexp2k(u, q as i32);
 
@@ -911,17 +911,17 @@ pub fn exp2(d: f64) -> f64 {
     let s = d - q;
 
     let mut u = 0.443_435_908_292_652_945_4_e-9
-        .mul_add(s, 0.707_316_459_808_570_742_5_e-8)
-        .mul_add(s, 0.101_781_926_092_176_045_1_e-6)
-        .mul_add(s, 0.132_154_387_251_132_761_5_e-5)
-        .mul_add(s, 0.152_527_335_351_758_473_e-4)
-        .mul_add(s, 0.154_035_304_510_114_780_8_e-3)
-        .mul_add(s, 0.133_335_581_467_049_907_3_e-2)
-        .mul_add(s, 0.961_812_910_759_760_053_6_e-2)
-        .mul_add(s, 0.555_041_086_648_204_659_6_e-1)
-        .mul_add(s, 0.240_226_506_959_101_221_4)
-        .mul_add(s, 0.693_147_180_559_945_286_2)
-        .mul_add(s, 0.1_e+1);
+        .mla(s, 0.707_316_459_808_570_742_5_e-8)
+        .mla(s, 0.101_781_926_092_176_045_1_e-6)
+        .mla(s, 0.132_154_387_251_132_761_5_e-5)
+        .mla(s, 0.152_527_335_351_758_473_e-4)
+        .mla(s, 0.154_035_304_510_114_780_8_e-3)
+        .mla(s, 0.133_335_581_467_049_907_3_e-2)
+        .mla(s, 0.961_812_910_759_760_053_6_e-2)
+        .mla(s, 0.555_041_086_648_204_659_6_e-1)
+        .mla(s, 0.240_226_506_959_101_221_4)
+        .mla(s, 0.693_147_180_559_945_286_2)
+        .mla(s, 0.1_e+1);
 
     u = ldexp2k(u, q as i32);
 
@@ -972,11 +972,11 @@ pub fn cbrt(mut d: f64) -> f64 {
     d = fabsk(d);
 
     let mut x = (-0.640_245_898_480_692_909_870_982_f64)
-        .mul_add(d, 2.961_551_030_200_395_118_185_95)
-        .mul_add(d, -5.733_530_609_229_478_436_361_66)
-        .mul_add(d, 6.039_903_689_894_587_479_614_07)
-        .mul_add(d, -3.858_419_355_104_449_888_216_32)
-        .mul_add(d, 2.230_727_530_249_660_972_572_2);
+        .mla(d, 2.961_551_030_200_395_118_185_95)
+        .mla(d, -5.733_530_609_229_478_436_361_66)
+        .mla(d, 6.039_903_689_894_587_479_614_07)
+        .mla(d, -3.858_419_355_104_449_888_216_32)
+        .mla(d, 2.230_727_530_249_660_972_572_2);
 
     let mut y = x * x;
     y = y * y;
diff --git a/src/f64x.rs b/src/f64x.rs
index 9e07170..4736016 100644
--- a/src/f64x.rs
+++ b/src/f64x.rs
@@ -525,10 +525,10 @@ macro_rules! impl_math_f64 {
 
         impl MulAdd for F64x {
             #[inline]
-            fn mul_add(self, y: Self, z: Self) -> Self {
+            fn mla(self, y: Self, z: Self) -> Self {
                 if cfg!(target_feature = "fma") {
                     use std::simd::{StdFloat};
-                    <Self as StdFloat>::mul_add(self, y, z)
+                    self.mul_add(y, z)
                 } else {
                     self * y + z
                 }
@@ -540,7 +540,7 @@ macro_rules! impl_math_f64 {
             fn mul_sub(self, y: Self, z: Self) -> Self {
                 if cfg!(target_feature = "fma") {
                     use std::simd::{StdFloat};
-                    <Self as StdFloat>::mul_add(self, y, -z)
+                    self.mul_add(y, -z)
                 } else {
                     self * y - z
                 }
@@ -552,7 +552,7 @@ macro_rules! impl_math_f64 {
             fn neg_mul_add(self, y: Self, z: Self) -> Self {
                 if cfg!(target_feature = "fma") {
                     use std::simd::{StdFloat};
-                    <Self as StdFloat>::mul_add(-self, y, z)
+                    (-self).mul_add(y, z)
                 } else {
                     -self * y + z
                 }
@@ -657,7 +657,7 @@ macro_rules! impl_math_f64 {
                     self.trunc().simd_eq(self)
                 } else {
                     let mut x = (self * (ONE / D1_31X)).trunc();
-                    x = (-D1_31X).mul_add(x, self);
+                    x = (-D1_31X).mla(x, self);
                     x.trunc().simd_eq(x) | self.abs().simd_gt(D1_53X)
                 }
             }
@@ -723,7 +723,7 @@ macro_rules! impl_math_f64 {
                     x.trunc().simd_ne(x)
                 } else {
                     let mut x = (self * (ONE / D1_31X)).trunc();
-                    x = (-D1_31X).mul_add(x, self);
+                    x = (-D1_31X).mla(x, self);
 
                     (x.trunci() & Ix::splat(1)).simd_eq(Ix::splat(1)).cast::<i64>() & self.abs().simd_lt(D1_53X)
                 }
@@ -769,12 +769,12 @@ macro_rules! impl_math_f64 {
                 let c = D1_52X.mul_sign(x);
                 let rint4x = (F64x::splat(4.) * x).abs().simd_gt(D1_52X).select(
                     (F64x::splat(4.) * x),
-                    (F64x::splat(4.).mul_add(x, c) - c).or_sign(x)
+                    (F64x::splat(4.).mla(x, c) - c).or_sign(x)
                 );
                 let rintx  = x.abs().simd_gt(D1_52X).select(x, ((x + c) - c).or_sign(x));
 
-                let fr = F64x::splat(-0.25).mul_add(rint4x, x);
-                let vi = F64x::splat(-4.).mul_add(rintx, rint4x).trunci();
+                let fr = F64x::splat(-0.25).mla(rint4x, x);
+                let vi = F64x::splat(-4.).mla(rintx, rint4x).trunci();
                 (fr, vi)
             }
         }
@@ -1141,7 +1141,7 @@ macro_rules! impl_math_f64 {
             #[cfg(not(feature = "full_fp_rounding"))]
             #[inline]
             fn trunc_positive(x: F64x) -> F64x {
-                let mut fr = (-D1_31X).mul_add((x * (ONE / D1_31X)).trunci().cast(), x);
+                let mut fr = (-D1_31X).mla((x * (ONE / D1_31X)).trunci().cast(), x);
                 fr -= fr.trunci().cast();
                 x.abs().simd_ge(D1_52X).select(x, x - fr)
             }
@@ -1256,35 +1256,35 @@ macro_rules! impl_math_f64 {
                     9.944_803_876_268_437_740_902_08_e-16,
                     -2.024_611_207_851_823_992_958_68_e-14,
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         -3.897_962_260_629_327_991_640_47_e-13,
                         6.948_218_305_801_794_613_277_84_e-12,
                     ),
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         1.150_115_825_399_960_352_669_01_e-10,
                         -1.757_247_499_528_531_799_526_64_e-9,
                     ),
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         -2.461_136_950_104_469_749_535_9_e-8,
                         3.133_616_889_668_683_928_784_22_e-7,
                     ),
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         3.590_860_448_590_527_540_050_62_e-6,
                         -3.657_620_418_216_155_192_036_1_e-5,
                     ),
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         -0.000_325_991_886_927_389_905_997_954,
diff --git a/src/f64x/u05_impl.rs b/src/f64x/u05_impl.rs
index aa439db..5fab595 100644
--- a/src/f64x/u05_impl.rs
+++ b/src/f64x/u05_impl.rs
@@ -21,11 +21,11 @@ macro_rules! impl_math_f64_u05 {
             //
 
             let u = F64x::splat(-2.024_611_207_851_823_992_958_68_e-14)
-                .mul_add(s, F64x::splat(6.948_218_305_801_794_613_277_84_e-12))
-                .mul_add(s, F64x::splat(-1.757_247_499_528_531_799_526_64_e-9))
-                .mul_add(s, F64x::splat(3.133_616_889_668_683_928_784_22_e-7))
-                .mul_add(s, F64x::splat(-3.657_620_418_216_155_192_036_1_e-5))
-                .mul_add(s, F64x::splat(0.002_490_394_570_192_718_502_743_56));
+                .mla(s, F64x::splat(6.948_218_305_801_794_613_277_84_e-12))
+                .mla(s, F64x::splat(-1.757_247_499_528_531_799_526_64_e-9))
+                .mla(s, F64x::splat(3.133_616_889_668_683_928_784_22_e-7))
+                .mla(s, F64x::splat(-3.657_620_418_216_155_192_036_1_e-5))
+                .mla(s, F64x::splat(0.002_490_394_570_192_718_502_743_56));
             let mut x = u * s
                 + Doubled::new(
                     F64x::splat(-0.080_745_512_188_280_785_248_473_1),
@@ -45,11 +45,11 @@ macro_rules! impl_math_f64_u05 {
             //
 
             let u = F64x::splat(9.944_803_876_268_437_740_902_08_e-16)
-                .mul_add(s, F64x::splat(-3.897_962_260_629_327_991_640_47_e-13))
-                .mul_add(s, F64x::splat(1.150_115_825_399_960_352_669_01_e-10))
-                .mul_add(s, F64x::splat(-2.461_136_950_104_469_749_535_9_e-8))
-                .mul_add(s, F64x::splat(3.590_860_448_590_527_540_050_62_e-6))
-                .mul_add(s, F64x::splat(-0.000_325_991_886_927_389_905_997_954));
+                .mla(s, F64x::splat(-3.897_962_260_629_327_991_640_47_e-13))
+                .mla(s, F64x::splat(1.150_115_825_399_960_352_669_01_e-10))
+                .mla(s, F64x::splat(-2.461_136_950_104_469_749_535_9_e-8))
+                .mla(s, F64x::splat(3.590_860_448_590_527_540_050_62_e-6))
+                .mla(s, F64x::splat(-0.000_325_991_886_927_389_905_997_954));
             let mut x = u * s
                 + Doubled::new(
                     F64x::splat(0.015_854_344_243_815_501_891_425_9),
@@ -160,35 +160,35 @@ macro_rules! impl_math_f64_u05 {
                     9.944_803_876_268_437_740_902_08_e-16,
                     -2.024_611_207_851_823_992_958_68_e-14,
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         -3.897_962_260_629_327_991_640_47_e-13,
                         6.948_218_305_801_794_613_277_84_e-12,
                     ),
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         1.150_115_825_399_960_352_669_01_e-10,
                         -1.757_247_499_528_531_799_526_64_e-9,
                     ),
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         -2.461_136_950_104_469_749_535_9_e-8,
                         3.133_616_889_668_683_928_784_22_e-7,
                     ),
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         3.590_860_448_590_527_540_050_62_e-6,
                         -3.657_620_418_216_155_192_036_1_e-5,
                     ),
                 )
-                .mul_add(
+                .mla(
                     s,
                     o.select_splat(
                         -0.000_325_991_886_927_389_905_997_954,
diff --git a/src/f64x/u10_impl.rs b/src/f64x/u10_impl.rs
index 56316fc..cb654a6 100644
--- a/src/f64x/u10_impl.rs
+++ b/src/f64x/u10_impl.rs
@@ -13,7 +13,7 @@ macro_rules! impl_math_f64_u10 {
             if d.abs().simd_lt(TRIGRANGEMAX2).all() {
                 let dql = (d * FRAC_1_PI).round();
                 ql = dql.roundi();
-                let u = dql.mul_add(-PI_A2, d);
+                let u = dql.mla(-PI_A2, d);
                 s = u.add_checked_as_doubled(dql * (-PI_B2));
             } else if d.abs().simd_lt(TRIGRANGEMAX).all() {
                 let dqh = (d * (FRAC_1_PI / D1_24X)).trunc();
@@ -21,7 +21,7 @@ macro_rules! impl_math_f64_u10 {
                 let dql = (d.mul_sub(FRAC_1_PI, dqh)).round();
                 ql = dql.roundi();
 
-                let u = dqh.mul_add(-PI_A, d);
+                let u = dqh.mla(-PI_A, d);
                 s = u.add_checked_as_doubled(dql * (-PI_A));
                 s += dqh * (-PI_B);
                 s += dql * (-PI_B);
@@ -59,7 +59,7 @@ macro_rules! impl_math_f64_u10 {
                 -2.505_210_681_484_312_335_936_8_e-8,
                 2.755_731_921_044_282_247_773_79_e-6,
                 -0.000_198_412_698_412_046_454_654_947)
-                .mul_add(s.0, F64x::splat(0.008_333_333_333_333_180_562_019_22));
+                .mla(s.0, F64x::splat(0.008_333_333_333_333_180_562_019_22));
 
             let x = ONE.add_checked(
                 (F64x::splat(-0.166_666_666_666_666_657_414_808).add_checked_as_doubled(u * s.0)) * s,
@@ -88,7 +88,7 @@ macro_rules! impl_math_f64_u10 {
             let g = d.abs().simd_lt(TRIGRANGEMAX2);
             let dql = (d * FRAC_1_PI).round();
             ql = dql.roundi();
-            let u = dql.mul_add(-PI_A2, d);
+            let u = dql.mla(-PI_A2, d);
             let mut x = u.add_checked_as_doubled(dql * (-PI_B2));
 
             if !g.all() {
@@ -96,7 +96,7 @@ macro_rules! impl_math_f64_u10 {
                 dqh *= D1_24X;
                 let dql = d.mul_sub(FRAC_1_PI, dqh).round();
 
-                let u = dqh.mul_add(-PI_A, d);
+                let u = dqh.mla(-PI_A, d);
                 s = u.add_checked_as_doubled(dql * (-PI_A));
                 s += dqh * (-PI_B);
                 s += dql * (-PI_B);
@@ -142,7 +142,7 @@ macro_rules! impl_math_f64_u10 {
                 -2.505_210_681_484_312_335_936_8_e-8,
                 2.755_731_921_044_282_247_773_79_e-6,
                 -0.000_198_412_698_412_046_454_654_947)
-                .mul_add(s.0, F64x::splat(0.008_333_333_333_333_180_562_019_22));
+                .mla(s.0, F64x::splat(0.008_333_333_333_333_180_562_019_22));
 
             x = ONE.add_checked(
                 F64x::splat(-0.166_666_666_666_666_657_414_808).add_checked_as_doubled(u * s.0) * s,
@@ -184,21 +184,21 @@ macro_rules! impl_math_f64_u10 {
             let mut ql;
 
             if d.abs().simd_lt(TRIGRANGEMAX2).all() {
-                let dql = d.mul_add(FRAC_1_PI, F64x::splat(-0.5)).round();
-                let dql = F64x::splat(2.).mul_add(dql, ONE);
+                let dql = d.mla(FRAC_1_PI, F64x::splat(-0.5)).round();
+                let dql = F64x::splat(2.).mla(dql, ONE);
                 ql = dql.roundi();
                 s = d.add_as_doubled(dql * (-PI_A2) * HALF);
                 s = s.add_checked(dql * (-PI_B2) * HALF);
             } else if d.abs().simd_lt(TRIGRANGEMAX).all() {
                 let dqh = d
-                    .mul_add(FRAC_1_PI / D1_23X, -FRAC_1_PI / D1_24X)
+                    .mla(FRAC_1_PI / D1_23X, -FRAC_1_PI / D1_24X)
                     .trunc();
-                ql = (d * FRAC_1_PI + dqh.mul_add(-D1_23X, F64x::splat(-0.5))).roundi();
+                ql = (d * FRAC_1_PI + dqh.mla(-D1_23X, F64x::splat(-0.5))).roundi();
                 let dqh = dqh * D1_24X;
                 ql = ql + ql + Ix::splat(1);
                 let dql: F64x = ql.cast();
 
-                let u = dqh.mul_add(-PI_A * HALF, d);
+                let u = dqh.mla(-PI_A * HALF, d);
                 s = u.add_as_doubled(dql * -PI_A * HALF);
                 s += dqh * (-PI_B) * HALF;
                 s += dql * (-PI_B) * HALF;
@@ -237,7 +237,7 @@ macro_rules! impl_math_f64_u10 {
                 -2.505_210_681_484_312_335_936_8_e-8,
                 2.755_731_921_044_282_247_773_79_e-6,
                 -0.000_198_412_698_412_046_454_654_947)
-                .mul_add(s.0, F64x::splat(0.008_333_333_333_333_180_562_019_22));
+                .mla(s.0, F64x::splat(0.008_333_333_333_333_180_562_019_22));
 
             let x = ONE.add_checked(
                 (F64x::splat(-0.166_666_666_666_666_657_414_808).add_checked_as_doubled(u * s.0)) * s,
@@ -260,20 +260,20 @@ macro_rules! impl_math_f64_u10 {
         /// NOTE: This version is slower, but SIMD lanes are independent
         pub fn cos_deterministic(d: F64x) -> F64x {
             let g = d.abs().simd_lt(TRIGRANGEMAX2);
-            let mut dql = d.mul_add(FRAC_1_PI, F64x::splat(-0.5)).round();
-            dql = F64x::splat(2.).mul_add(dql, ONE);
+            let mut dql = d.mla(FRAC_1_PI, F64x::splat(-0.5)).round();
+            dql = F64x::splat(2.).mla(dql, ONE);
             let mut ql = dql.roundi();
             let mut x = d.add_as_doubled(dql * (-PI_A2 * HALF));
             x = x.add_checked(dql * (-PI_B2 * HALF));
 
             if !g.all() {
-                let mut dqh = (d.mul_add(FRAC_1_PI / D1_23X, -FRAC_1_PI / D1_24X)).trunc();
-                let mut ql2 = (d * FRAC_1_PI + dqh.mul_add(-D1_23X, F64x::splat(-0.5))).roundi();
+                let mut dqh = (d.mla(FRAC_1_PI / D1_23X, -FRAC_1_PI / D1_24X)).trunc();
+                let mut ql2 = (d * FRAC_1_PI + dqh.mla(-D1_23X, F64x::splat(-0.5))).roundi();
                 dqh *= D1_24X;
                 ql2 = ql2 + ql2 + Ix::splat(1);
                 let dql: F64x = ql2.cast();
 
-                let u = dqh.mul_add(-PI_A * HALF, d);
+                let u = dqh.mla(-PI_A * HALF, d);
                 let mut s = u.add_as_doubled(dql * (-PI_A * HALF));
                 s += dqh * (-PI_B * HALF);
                 s += dql * (-PI_B * HALF);
@@ -320,7 +320,7 @@ macro_rules! impl_math_f64_u10 {
                 -2.505_210_681_484_312_335_936_8_e-8,
                 2.755_731_921_044_282_247_773_79_e-6,
                 -0.000_198_412_698_412_046_454_654_947)
-                .mul_add(s.0, F64x::splat(0.008_333_333_333_333_180_562_019_22));
+                .mla(s.0, F64x::splat(0.008_333_333_333_333_180_562_019_22));
 
             x = ONE.add_checked(
                 F64x::splat(-0.166_666_666_666_666_657_414_808).add_checked_as_doubled(u * s.0) * s,
@@ -365,7 +365,7 @@ macro_rules! impl_math_f64_u10 {
             if d.abs().simd_lt(TRIGRANGEMAX2).all() {
                 let dql = (d * FRAC_2_PI).round();
                 ql = dql.roundi();
-                let u = dql.mul_add(-PI_A2 * HALF, d);
+                let u = dql.mla(-PI_A2 * HALF, d);
                 s = u.add_checked_as_doubled(dql * (-PI_B2) * HALF);
             } else if d.abs().simd_lt(TRIGRANGEMAX).all() {
                 let dqh = (d * (FRAC_2_PI / D1_24X)).trunc();
@@ -373,7 +373,7 @@ macro_rules! impl_math_f64_u10 {
                 let dql = (d * FRAC_2_PI - dqh).round();
                 ql = dql.roundi();
 
-                let u = dqh.mul_add(-PI_A * HALF, d);
+                let u = dqh.mla(-PI_A * HALF, d);
                 s = u.add_checked_as_doubled(dql * (-PI_A) * HALF);
                 s += dqh * (-PI_B) * HALF;
                 s += dql * (-PI_B) * HALF;
@@ -396,11 +396,11 @@ macro_rules! impl_math_f64_u10 {
             s.0 = s.square_as_f();
 
             let u = F64x::splat(1.589_383_072_832_289_373_285_11_e-10)
-                .mul_add(s.0, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
-                .mul_add(s.0, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
-                .mul_add(s.0, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
-                .mul_add(s.0, F64x::splat(0.008_333_333_333_319_184_596_174_6))
-                .mul_add(s.0, F64x::splat(-0.166_666_666_666_666_130_709_393))
+                .mla(s.0, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
+                .mla(s.0, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
+                .mla(s.0, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
+                .mla(s.0, F64x::splat(0.008_333_333_333_319_184_596_174_6))
+                .mla(s.0, F64x::splat(-0.166_666_666_666_666_130_709_393))
                 * (s.0 * t.0);
 
             let x = t.add_checked(u);
@@ -409,12 +409,12 @@ macro_rules! impl_math_f64_u10 {
             let rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F64x::splat(-1.136_153_502_390_974_295_315_23_e-11)
-                .mul_add(s.0, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
-                .mul_add(s.0, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
-                .mul_add(s.0, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
-                .mul_add(s.0, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
-                .mul_add(s.0, F64x::splat(0.041_666_666_666_666_551_959_206_2))
-                .mul_add(s.0, F64x::splat(-0.5));
+                .mla(s.0, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
+                .mla(s.0, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
+                .mla(s.0, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
+                .mla(s.0, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
+                .mla(s.0, F64x::splat(0.041_666_666_666_666_551_959_206_2))
+                .mla(s.0, F64x::splat(-0.5));
 
             let x = ONE.add_checked(s.0.mul_as_doubled(u));
             let ry = F64x::from(x);
@@ -448,7 +448,7 @@ macro_rules! impl_math_f64_u10 {
         pub fn sincos_deterministic(d: F64x) -> (F64x, F64x) {
             let dql = (d * FRAC_2_PI).round();
             let mut ql = dql.roundi();
-            let u = dql.mul_add(-PI_A2 * HALF, d);
+            let u = dql.mla(-PI_A2 * HALF, d);
             let mut s = u.add_checked_as_doubled(dql * (-PI_B2 * HALF));
             let g = d.abs().simd_lt(TRIGRANGEMAX2);
 
@@ -457,7 +457,7 @@ macro_rules! impl_math_f64_u10 {
                 dqh *= D1_24X;
                 let dql = (d * FRAC_2_PI - dqh).round();
 
-                let u = dqh.mul_add(-PI_A * HALF, d);
+                let u = dqh.mla(-PI_A * HALF, d);
                 let mut x = u.add_checked_as_doubled(dql * (-PI_A * HALF));
                 x += dqh * (-PI_B * HALF);
                 x += dql * (-PI_B * HALF);
@@ -488,11 +488,11 @@ macro_rules! impl_math_f64_u10 {
             s.0 = s.square_as_f();
 
             let u = F64x::splat(1.589_383_072_832_289_373_285_11_e-10)
-                .mul_add(s.0, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
-                .mul_add(s.0, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
-                .mul_add(s.0, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
-                .mul_add(s.0, F64x::splat(0.008_333_333_333_319_184_596_174_6))
-                .mul_add(s.0, F64x::splat(-0.166_666_666_666_666_130_709_393))
+                .mla(s.0, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
+                .mla(s.0, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
+                .mla(s.0, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
+                .mla(s.0, F64x::splat(0.008_333_333_333_319_184_596_174_6))
+                .mla(s.0, F64x::splat(-0.166_666_666_666_666_130_709_393))
                 * (s.0 * t.0);
 
             let x = t.add_checked(u);
@@ -501,12 +501,12 @@ macro_rules! impl_math_f64_u10 {
             rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F64x::splat(-1.136_153_502_390_974_295_315_23_e-11)
-                .mul_add(s.0, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
-                .mul_add(s.0, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
-                .mul_add(s.0, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
-                .mul_add(s.0, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
-                .mul_add(s.0, F64x::splat(0.041_666_666_666_666_551_959_206_2))
-                .mul_add(s.0, F64x::splat(-0.5));
+                .mla(s.0, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
+                .mla(s.0, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
+                .mla(s.0, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
+                .mla(s.0, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
+                .mla(s.0, F64x::splat(0.041_666_666_666_666_551_959_206_2))
+                .mla(s.0, F64x::splat(-0.5));
 
             let x = ONE.add_checked(s.0.mul_as_doubled(u));
             let ry = F64x::from(x);
@@ -561,7 +561,7 @@ macro_rules! impl_math_f64_u10 {
             if d.abs().simd_lt(TRIGRANGEMAX2).all() {
                 let dql = (d * FRAC_2_PI).round();
                 ql = dql.roundi();
-                let u = dql.mul_add(-PI_A2 * HALF, d);
+                let u = dql.mla(-PI_A2 * HALF, d);
                 s = u.add_checked_as_doubled(dql * (-PI_B2) * HALF);
             } else if d.abs().simd_lt(TRIGRANGEMAX).all() {
                 let dqh = (d * (FRAC_2_PI / D1_24X)).trunc();
@@ -571,7 +571,7 @@ macro_rules! impl_math_f64_u10 {
                 let dql = F64x::from(s).trunc();
                 ql = dql.roundi();
 
-                let u = dqh.mul_add(-PI_A * HALF, d);
+                let u = dqh.mla(-PI_A * HALF, d);
                 s = u.add_checked_as_doubled(dql * (-PI_A) * HALF);
                 s += dqh * (-PI_B) * HALF;
                 s += dql * (-PI_B) * HALF;
@@ -603,7 +603,7 @@ macro_rules! impl_math_f64_u10 {
                 0.218_694_872_818_553_549_8_e-1,
                 0.539_682_539_951_727_297_e-1,
                 0.133_333_333_333_050_058_1)
-                .mul_add(s.0, F64x::splat(0.333_333_333_333_334_369_5));
+                .mla(s.0, F64x::splat(0.333_333_333_333_334_369_5));
             let mut x = t.add_checked(s * t * u);
 
             let y = (-ONE).add_checked(x.square());
@@ -627,7 +627,7 @@ macro_rules! impl_math_f64_u10 {
         pub fn tan_deterministic(d: F64x) -> F64x {
             let dql = (d * FRAC_2_PI).round();
             let mut ql = dql.roundi();
-            let u = dql.mul_add(-PI_A2 * HALF, d);
+            let u = dql.mla(-PI_A2 * HALF, d);
             let mut s = u.add_checked_as_doubled(dql * (-PI_B2 * HALF));
             let g = d.abs().simd_lt(TRIGRANGEMAX2);
 
@@ -638,7 +638,7 @@ macro_rules! impl_math_f64_u10 {
                     + (d.simd_lt(ZERO).select(F64x::splat(-0.5), HALF) - dqh);
                 let dql = F64x::from(x).trunc();
 
-                let u = dqh.mul_add(-PI_A * HALF, d);
+                let u = dqh.mla(-PI_A * HALF, d);
                 x = u.add_checked_as_doubled(dql * (-PI_A * HALF));
                 x += dqh * (-PI_B * HALF);
                 x += dql * (-PI_B * HALF);
@@ -678,7 +678,7 @@ macro_rules! impl_math_f64_u10 {
                 0.218_694_872_818_553_549_8_e-1,
                 0.539_682_539_951_727_297_e-1,
                 0.133_333_333_333_050_058_1)
-                .mul_add(s.0, F64x::splat(0.333_333_333_333_334_369_5));
+                .mla(s.0, F64x::splat(0.333_333_333_333_334_369_5));
             let mut x = t.add_checked(s * t * u);
 
             let y = (-ONE).add_checked(x.square());
@@ -749,10 +749,10 @@ macro_rules! impl_math_f64_u10 {
                 -0.066_662_088_477_879_549_719_418_2,
                 0.076_922_533_029_620_376_865_409_5,
                 -0.090_909_044_277_338_757_478_190_7)
-                .mul_add(t.0, F64x::splat(0.111_111_108_376_896_236_538_123))
-                .mul_add(t.0, F64x::splat(-0.142_857_142_756_268_568_062_339))
-                .mul_add(t.0, F64x::splat(0.199_999_999_997_977_351_284_817))
-                .mul_add(t.0, F64x::splat(-0.333_333_333_333_317_605_173_818));
+                .mla(t.0, F64x::splat(0.111_111_108_376_896_236_538_123))
+                .mla(t.0, F64x::splat(-0.142_857_142_756_268_568_062_339))
+                .mla(t.0, F64x::splat(0.199_999_999_997_977_351_284_817))
+                .mla(t.0, F64x::splat(-0.333_333_333_333_317_605_173_818));
 
             t = s.add_checked(s * t * u);
             (Doubled::new(
@@ -1038,7 +1038,7 @@ macro_rules! impl_math_f64_u10 {
                 0.285_714_285_511_134_091_777_308,
                 0.400_000_000_000_914_013_309_483,
             )
-            .mul_add(x2.0, F64x::splat(0.666_666_666_666_664_853_302_393));
+            .mla(x2.0, F64x::splat(0.666_666_666_666_664_853_302_393));
 
             let mut s = Doubled::<F64x>::splat(crate::f64::D_LN2) * e.cast();
             s = s.add_checked(x.scale(F64x::splat(2.)));
@@ -1157,12 +1157,12 @@ macro_rules! impl_math_f64_u10 {
             let x2 = x.0 * x.0;
 
             let t = F64x::splat(0.153_207_698_850_270_135_3)
-                .mul_add(x2, F64x::splat(0.152_562_905_100_342_871_6))
-                .mul_add(x2, F64x::splat(0.181_860_593_293_778_599_6))
-                .mul_add(x2, F64x::splat(0.222_221_451_983_938_000_9))
-                .mul_add(x2, F64x::splat(0.285_714_293_279_429_931_7))
-                .mul_add(x2, F64x::splat(0.399_999_999_963_525_199))
-                .mul_add(x2, F64x::splat(0.666_666_666_666_733_354_1));
+                .mla(x2, F64x::splat(0.152_562_905_100_342_871_6))
+                .mla(x2, F64x::splat(0.181_860_593_293_778_599_6))
+                .mla(x2, F64x::splat(0.222_221_451_983_938_000_9))
+                .mla(x2, F64x::splat(0.285_714_293_279_429_931_7))
+                .mla(x2, F64x::splat(0.399_999_999_963_525_199))
+                .mla(x2, F64x::splat(0.666_666_666_666_733_354_1));
 
             s = s.add_checked(x.scale(F64x::splat(2.)));
             s = s.add_checked(x2 * x.0 * t);
@@ -1353,14 +1353,14 @@ macro_rules! impl_math_f64_u10 {
                 dp1 = o.select(dp1 * (D1_32X * D1_32X), dp1);
                 let mut e = ilogb2k(dp1 * F64x::splat(1. / 0.75));
                 let t = ldexp3k(ONE, -e);
-                m = d.mul_add(t, t - ONE);
+                m = d.mla(t, t - ONE);
                 e = o.cast().select(e - Ix::splat(64), e);
                 Doubled::<F64x>::splat(crate::f64::D_LN2) * e.cast()
             }/* else {
                 let e = vgetexp_vd_vd(dp1, F64x::splat(1. / 0.75));
                 e = e.simd_eq(INFINITY).select(F64x::splat(1024.), e);
                 let t = ldexp3k(ONE, -e.roundi());
-                m = d.mul_add(t, t - ONE);
+                m = d.mla(t, t - ONE);
                 Doubled::<F64x>::splat(crate::f64::D_LN2) * e
             }*/;
 
@@ -1407,8 +1407,8 @@ macro_rules! impl_math_f64_u10 {
             let mut u = (d * R_LN2).round();
             let q = u.roundi();
 
-            let s = u.mul_add(-L2_U, d);
-            let s = u.mul_add(-L2_L, s);
+            let s = u.mla(-L2_U, d);
+            let s = u.mla(-L2_L, s);
 
             if cfg!(target_feature = "fma") {
                 let s2 = s * s;
@@ -1425,9 +1425,9 @@ macro_rules! impl_math_f64_u10 {
                     0.833_333_333_331_493_821_e-2,
                     0.416_666_666_666_660_259_8_e-1,
                     0.166_666_666_666_666_907_2)
-                    .mul_add(s, HALF)
-                    .mul_add(s, ONE)
-                    .mul_add(s, ONE);
+                    .mla(s, HALF)
+                    .mla(s, ONE)
+                    .mla(s, ONE);
 
             } else {
                 let s2 = s * s;
@@ -1445,9 +1445,9 @@ macro_rules! impl_math_f64_u10 {
                     0.008_333_333_333_316_527_216_649_84,
                     0.041_666_666_666_666_504_759_142_2,
                     0.166_666_666_666_666_851_703_837)
-                    .mul_add(s, HALF);
+                    .mla(s, HALF);
 
-                u = ONE + (s * s).mul_add(u, s);
+                u = ONE + (s * s).mla(u, s);
             }
 
             u = ldexp2k(u, q);
@@ -1476,23 +1476,23 @@ macro_rules! impl_math_f64_u10 {
             let mut u = (d * LOG10_2).round();
             let q = u.roundi();
 
-            let s = u.mul_add(-L10_U, d);
-            let s = u.mul_add(-L10_L, s);
+            let s = u.mla(-L10_U, d);
+            let s = u.mla(-L10_L, s);
 
             u = F64x::splat(0.241_146_349_833_426_765_2_e-3)
-                .mul_add(s, F64x::splat(0.115_748_841_521_718_737_5_e-2))
-                .mul_add(s, F64x::splat(0.501_397_554_678_973_365_9_e-2))
-                .mul_add(s, F64x::splat(0.195_976_232_072_053_308_e-1))
-                .mul_add(s, F64x::splat(0.680_893_639_944_678_413_8_e-1))
-                .mul_add(s, F64x::splat(0.206_995_849_472_267_623_4))
-                .mul_add(s, F64x::splat(0.539_382_929_205_853_622_9))
-                .mul_add(s, F64x::splat(0.117_125_514_890_854_165_5_e+1))
-                .mul_add(s, F64x::splat(0.203_467_859_229_343_295_3_e+1))
-                .mul_add(s, F64x::splat(0.265_094_905_523_920_587_6_e+1))
-                .mul_add(s, F64x::splat(0.230_258_509_299_404_590_1_e+1));
+                .mla(s, F64x::splat(0.115_748_841_521_718_737_5_e-2))
+                .mla(s, F64x::splat(0.501_397_554_678_973_365_9_e-2))
+                .mla(s, F64x::splat(0.195_976_232_072_053_308_e-1))
+                .mla(s, F64x::splat(0.680_893_639_944_678_413_8_e-1))
+                .mla(s, F64x::splat(0.206_995_849_472_267_623_4))
+                .mla(s, F64x::splat(0.539_382_929_205_853_622_9))
+                .mla(s, F64x::splat(0.117_125_514_890_854_165_5_e+1))
+                .mla(s, F64x::splat(0.203_467_859_229_343_295_3_e+1))
+                .mla(s, F64x::splat(0.265_094_905_523_920_587_6_e+1))
+                .mla(s, F64x::splat(0.230_258_509_299_404_590_1_e+1));
 
             if cfg!(target_feature = "fma") {
-                u = u.mul_add(s, ONE);
+                u = u.mla(s, ONE);
             } else {
                 u = ONE.add_checked(u.mul_as_doubled(s)).normalize().0;
             }
@@ -1566,10 +1566,10 @@ macro_rules! impl_math_f64_u10 {
                 0.961_812_910_759_760_053_6_e-2,
                 0.555_041_086_648_204_659_6_e-1,
                 0.240_226_506_959_101_221_4)
-                .mul_add(s, F64x::splat(0.693_147_180_559_945_286_2));
+                .mla(s, F64x::splat(0.693_147_180_559_945_286_2));
 
             if cfg!(target_feature = "fma") {
-                u = u.mul_add(s, ONE);
+                u = u.mla(s, ONE);
             } else {
                 u = ONE.add_checked(u.mul_as_doubled(s)).normalize().0;
             }
@@ -1779,11 +1779,11 @@ macro_rules! impl_math_f64_u10 {
             d = d.abs();
 
             let mut x = F64x::splat(-0.640_245_898_480_692_909_870_982)
-                .mul_add(d, F64x::splat(2.961_551_030_200_395_118_185_95))
-                .mul_add(d, F64x::splat(-5.733_530_609_229_478_436_361_66))
-                .mul_add(d, F64x::splat(6.039_903_689_894_587_479_614_07))
-                .mul_add(d, F64x::splat(-3.858_419_355_104_449_888_216_32))
-                .mul_add(d, F64x::splat(2.230_727_530_249_660_972_572_2));
+                .mla(d, F64x::splat(2.961_551_030_200_395_118_185_95))
+                .mla(d, F64x::splat(-5.733_530_609_229_478_436_361_66))
+                .mla(d, F64x::splat(6.039_903_689_894_587_479_614_07))
+                .mla(d, F64x::splat(-3.858_419_355_104_449_888_216_32))
+                .mla(d, F64x::splat(2.230_727_530_249_660_972_572_2));
 
             let mut y = x * x;
             y = y * y;
@@ -1858,7 +1858,7 @@ macro_rules! impl_math_f64_u10 {
                 0.294_791_677_282_761_419_6_e+2,
                 0.707_481_600_086_460_927_9_e-7,
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1868,7 +1868,7 @@ macro_rules! impl_math_f64_u10 {
                     0.400_924_433_300_873_044_3_e-6,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1878,7 +1878,7 @@ macro_rules! impl_math_f64_u10 {
                     0.104_011_464_162_824_694_6_e-5,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1888,7 +1888,7 @@ macro_rules! impl_math_f64_u10 {
                     0.150_834_915_073_332_916_7_e-5,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1898,7 +1898,7 @@ macro_rules! impl_math_f64_u10 {
                     0.128_814_307_493_390_102_e-5,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1908,7 +1908,7 @@ macro_rules! impl_math_f64_u10 {
                     0.474_416_774_988_499_393_7_e-6,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1918,7 +1918,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.655_481_630_654_248_990_2_e-7,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1928,7 +1928,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.318_925_247_145_259_984_4_e-6,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1938,7 +1938,7 @@ macro_rules! impl_math_f64_u10 {
                     0.135_888_382_147_035_537_7_e-6,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1948,7 +1948,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.434_393_127_715_733_604_e-6,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1958,7 +1958,7 @@ macro_rules! impl_math_f64_u10 {
                     0.972_478_589_740_677_955_5_e-6,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1968,7 +1968,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.203_688_605_722_596_601_1_e-5,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1978,7 +1978,7 @@ macro_rules! impl_math_f64_u10 {
                     0.437_336_314_181_972_581_5_e-5,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1988,7 +1988,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.943_995_126_830_400_867_7_e-5,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -1998,7 +1998,7 @@ macro_rules! impl_math_f64_u10 {
                     0.205_072_703_037_638_980_4_e-4,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -2008,7 +2008,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.449_262_018_343_118_401_8_e-4,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -2018,7 +2018,7 @@ macro_rules! impl_math_f64_u10 {
                     0.994_575_123_607_187_593_1_e-4,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -2028,7 +2028,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.223_154_759_903_498_319_6_e-3,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -2038,7 +2038,7 @@ macro_rules! impl_math_f64_u10 {
                     0.509_669_524_710_196_762_2_e-3,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -2048,7 +2048,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.119_275_391_166_788_697_1_e-2,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -2058,7 +2058,7 @@ macro_rules! impl_math_f64_u10 {
                     0.289_051_033_074_221_031_e-2,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
@@ -2068,7 +2068,7 @@ macro_rules! impl_math_f64_u10 {
                     -0.738_555_102_867_446_185_8_e-2,
                 ),
             )
-            .mul_add(
+            .mla(
                 t,
                 F64x::select3(
                     o2,
diff --git a/src/f64x/u15_impl.rs b/src/f64x/u15_impl.rs
index 61a612f..7c15c65 100644
--- a/src/f64x/u15_impl.rs
+++ b/src/f64x/u15_impl.rs
@@ -29,7 +29,7 @@ macro_rules! impl_math_f64_u15 {
                 -0.575_781_953_642_071_044_9_e+2,
                 0.233_424_972_963_870_131_9_e+5,
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -41,7 +41,7 @@ macro_rules! impl_math_f64_u15 {
                     -0.469_566_104_493_310_776_9_e+5,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -53,7 +53,7 @@ macro_rules! impl_math_f64_u15 {
                     0.317_340_310_874_864_335_3_e+5,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -65,7 +65,7 @@ macro_rules! impl_math_f64_u15 {
                     0.324_298_278_695_957_378_7_e+4,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -77,7 +77,7 @@ macro_rules! impl_math_f64_u15 {
                     -0.201_471_799_976_034_781_1_e+5,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -89,7 +89,7 @@ macro_rules! impl_math_f64_u15 {
                     0.155_400_697_096_711_828_6_e+5,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -101,7 +101,7 @@ macro_rules! impl_math_f64_u15 {
                     -0.615_087_419_056_355_429_3_e+4,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -113,7 +113,7 @@ macro_rules! impl_math_f64_u15 {
                     0.124_004_776_563_481_573_2_e+4,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -125,7 +125,7 @@ macro_rules! impl_math_f64_u15 {
                     -0.821_032_547_575_269_973_1_e+2,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -137,7 +137,7 @@ macro_rules! impl_math_f64_u15 {
                     0.324_244_388_083_993_087_e+2,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -149,7 +149,7 @@ macro_rules! impl_math_f64_u15 {
                     -0.292_341_886_383_316_058_6_e+2,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -161,7 +161,7 @@ macro_rules! impl_math_f64_u15 {
                     0.345_746_173_281_438_307_1,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -173,7 +173,7 @@ macro_rules! impl_math_f64_u15 {
                     0.548_973_015_595_239_299_8_e+1,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -185,7 +185,7 @@ macro_rules! impl_math_f64_u15 {
                     0.155_993_413_225_129_413_4_e-2,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -197,7 +197,7 @@ macro_rules! impl_math_f64_u15 {
                     -0.154_174_156_683_152_063_8_e+1,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -209,7 +209,7 @@ macro_rules! impl_math_f64_u15 {
                     0.282_315_223_055_836_418_6_e-5,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
@@ -221,7 +221,7 @@ macro_rules! impl_math_f64_u15 {
                     0.624_999_918_419_534_283_8,
                 ),
             )
-            .mul_add(
+            .mla(
                 u.0,
                 F64x::select4(
                     o0,
diff --git a/src/f64x/u35_impl.rs b/src/f64x/u35_impl.rs
index 8f8c979..efe0004 100644
--- a/src/f64x/u35_impl.rs
+++ b/src/f64x/u35_impl.rs
@@ -13,21 +13,21 @@ macro_rules! impl_math_f64_u35 {
             if d.abs().simd_lt(TRIGRANGEMAX2).all() {
                 let dql = (d * FRAC_1_PI).round();
                 ql = dql.roundi();
-                d = dql.mul_add(-PI_A2, d);
-                d = dql.mul_add(-PI_B2, d);
+                d = dql.mla(-PI_A2, d);
+                d = dql.mla(-PI_B2, d);
             } else if d.abs().simd_lt(TRIGRANGEMAX).all() {
                 let dqh = (d * (FRAC_1_PI / D1_24X)).trunc();
                 let dqh = dqh * D1_24X;
                 let dql = d.mul_sub(FRAC_1_PI, dqh).round();
                 ql = dql.roundi();
 
-                d = dqh.mul_add(-PI_A, d);
-                d = dql.mul_add(-PI_A, d);
-                d = dqh.mul_add(-PI_B, d);
-                d = dql.mul_add(-PI_B, d);
-                d = dqh.mul_add(-PI_C, d);
-                d = dql.mul_add(-PI_C, d);
-                d = (dqh + dql).mul_add(-PI_D, d);
+                d = dqh.mla(-PI_A, d);
+                d = dql.mla(-PI_A, d);
+                d = dqh.mla(-PI_B, d);
+                d = dql.mla(-PI_B, d);
+                d = dqh.mla(-PI_C, d);
+                d = dql.mla(-PI_C, d);
+                d = (dqh + dql).mla(-PI_D, d);
             } else {
                 let (mut ddidd, ddii) = rempi(d);
                 ql = ddii & Ix::splat(3);
@@ -64,7 +64,7 @@ macro_rules! impl_math_f64_u35 {
                 2.755_731_922_391_987_476_304_16_e-6,
                 -0.000_198_412_698_412_696_162_806_809,
                 0.008_333_333_333_333_329_748_238_15)
-                .mul_add(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
+                .mla(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
 
             u = s * (u * d) + d;
 
@@ -85,8 +85,8 @@ macro_rules! impl_math_f64_u35 {
 
             let dql = (d * FRAC_1_PI).round();
             let mut ql = dql.roundi();
-            d = dql.mul_add(-PI_A2, d);
-            d = dql.mul_add(-PI_B2, d);
+            d = dql.mla(-PI_A2, d);
+            d = dql.mla(-PI_B2, d);
             let g = r.abs().simd_lt(TRIGRANGEMAX2);
 
             if !g.all() {
@@ -94,13 +94,13 @@ macro_rules! impl_math_f64_u35 {
                 dqh *= D1_24X;
                 let dql = r.mul_sub(FRAC_1_PI, dqh).round();
 
-                let mut u = dqh.mul_add(-PI_A, r);
-                u = dql.mul_add(-PI_A, u);
-                u = dqh.mul_add(-PI_B, u);
-                u = dql.mul_add(-PI_B, u);
-                u = dqh.mul_add(-PI_C, u);
-                u = dql.mul_add(-PI_C, u);
-                u = (dqh + dql).mul_add(-PI_D, u);
+                let mut u = dqh.mla(-PI_A, r);
+                u = dql.mla(-PI_A, u);
+                u = dqh.mla(-PI_B, u);
+                u = dql.mla(-PI_B, u);
+                u = dqh.mla(-PI_C, u);
+                u = dql.mla(-PI_C, u);
+                u = (dqh + dql).mla(-PI_D, u);
 
                 ql = g.cast().select(ql, dql.roundi());
                 d = g.select(d, u);
@@ -145,7 +145,7 @@ macro_rules! impl_math_f64_u35 {
                 2.755_731_922_391_987_476_304_16_e-6,
                 -0.000_198_412_698_412_696_162_806_809,
                 0.008_333_333_333_333_329_748_238_15)
-                .mul_add(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
+                .mla(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
 
             u = s * (u * d) + d;
 
@@ -178,26 +178,26 @@ macro_rules! impl_math_f64_u35 {
 
             if d.abs().simd_lt(TRIGRANGEMAX2).all() {
                 let dql =
-                    F64x::splat(2.).mul_add(d.mul_add(FRAC_1_PI, F64x::splat(-0.5)).round(), ONE);
+                    F64x::splat(2.).mla(d.mla(FRAC_1_PI, F64x::splat(-0.5)).round(), ONE);
                 ql = dql.roundi();
-                d = dql.mul_add(-PI_A2 * HALF, d);
-                d = dql.mul_add(-PI_B2 * HALF, d);
+                d = dql.mla(-PI_A2 * HALF, d);
+                d = dql.mla(-PI_B2 * HALF, d);
             } else if d.abs().simd_lt(TRIGRANGEMAX).all() {
                 let dqh = d
-                    .mul_add(FRAC_1_PI / D1_23X, -FRAC_1_PI / D1_24X)
+                    .mla(FRAC_1_PI / D1_23X, -FRAC_1_PI / D1_24X)
                     .trunc();
-                ql = (d * FRAC_1_PI + dqh.mul_add(-D1_23X, F64x::splat(-0.5))).roundi();
+                ql = (d * FRAC_1_PI + dqh.mla(-D1_23X, F64x::splat(-0.5))).roundi();
                 let dqh = dqh * D1_24X;
                 ql = ql + ql + Ix::splat(1);
                 let dql: F64x = ql.cast();
 
-                d = dqh.mul_add(-PI_A * HALF, d);
-                d = dql.mul_add(-PI_A * HALF, d);
-                d = dqh.mul_add(-PI_B * HALF, d);
-                d = dql.mul_add(-PI_B * HALF, d);
-                d = dqh.mul_add(-PI_C * HALF, d);
-                d = dql.mul_add(-PI_C * HALF, d);
-                d = (dqh + dql).mul_add(-PI_D * HALF, d);
+                d = dqh.mla(-PI_A * HALF, d);
+                d = dql.mla(-PI_A * HALF, d);
+                d = dqh.mla(-PI_B * HALF, d);
+                d = dql.mla(-PI_B * HALF, d);
+                d = dqh.mla(-PI_C * HALF, d);
+                d = dql.mla(-PI_C * HALF, d);
+                d = (dqh + dql).mla(-PI_D * HALF, d);
             } else {
                 let (mut ddidd, ddii) = rempi(d);
                 ql = ddii & Ix::splat(3);
@@ -235,7 +235,7 @@ macro_rules! impl_math_f64_u35 {
                 2.755_731_922_391_987_476_304_16_e-6,
                 -0.000_198_412_698_412_696_162_806_809,
                 0.008_333_333_333_333_329_748_238_15)
-                .mul_add(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
+                .mla(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
 
             s * (u * d) + d
         }
@@ -250,25 +250,25 @@ macro_rules! impl_math_f64_u35 {
             let r = d;
 
             let g = d.abs().simd_lt(TRIGRANGEMAX2);
-            let dql = F64x::splat(2.).mul_add((d.mul_add(FRAC_1_PI, F64x::splat(-0.5))).round(), ONE);
+            let dql = F64x::splat(2.).mla((d.mla(FRAC_1_PI, F64x::splat(-0.5))).round(), ONE);
             let mut ql = dql.roundi();
-            d = dql.mul_add(-PI_A2 * HALF, d);
-            d = dql.mul_add(-PI_B2 * HALF, d);
+            d = dql.mla(-PI_A2 * HALF, d);
+            d = dql.mla(-PI_B2 * HALF, d);
 
             if !g.all() {
-                let mut dqh = (r.mul_add(FRAC_1_PI / D1_23X, -FRAC_1_PI / D1_24X)).trunc();
-                let mut ql2 = (r * FRAC_1_PI + dqh.mul_add(-D1_23X, F64x::splat(-0.5))).roundi();
+                let mut dqh = (r.mla(FRAC_1_PI / D1_23X, -FRAC_1_PI / D1_24X)).trunc();
+                let mut ql2 = (r * FRAC_1_PI + dqh.mla(-D1_23X, F64x::splat(-0.5))).roundi();
                 dqh *= D1_24X;
                 ql2 = ql2 + ql2 + Ix::splat(1);
                 let dql: F64x = ql2.cast();
 
-                let mut u = dqh.mul_add(-PI_A * HALF, r);
-                u = dql.mul_add(-PI_A * HALF, u);
-                u = dqh.mul_add(-PI_B * HALF, u);
-                u = dql.mul_add(-PI_B * HALF, u);
-                u = dqh.mul_add(-PI_C * HALF, u);
-                u = dql.mul_add(-PI_C * HALF, u);
-                u = (dqh + dql).mul_add(-PI_D * HALF, u);
+                let mut u = dqh.mla(-PI_A * HALF, r);
+                u = dql.mla(-PI_A * HALF, u);
+                u = dqh.mla(-PI_B * HALF, u);
+                u = dql.mla(-PI_B * HALF, u);
+                u = dqh.mla(-PI_C * HALF, u);
+                u = dql.mla(-PI_C * HALF, u);
+                u = (dqh + dql).mla(-PI_D * HALF, u);
 
                 ql = g.cast().select(ql, ql2);
                 d = g.select(d, u);
@@ -314,7 +314,7 @@ macro_rules! impl_math_f64_u35 {
                 2.755_731_922_391_987_476_304_16_e-6,
                 -0.000_198_412_698_412_696_162_806_809,
                 0.008_333_333_333_333_329_748_238_15)
-                .mul_add(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
+                .mla(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
 
             s * (u * d) + d
         }
@@ -348,21 +348,21 @@ macro_rules! impl_math_f64_u35 {
             if d.abs().simd_lt(TRIGRANGEMAX2).all() {
                 let dql = (d * FRAC_2_PI).round();
                 ql = dql.roundi();
-                s = dql.mul_add(-PI_A2 * HALF, d);
-                s = dql.mul_add(-PI_B2 * HALF, s);
+                s = dql.mla(-PI_A2 * HALF, d);
+                s = dql.mla(-PI_B2 * HALF, s);
             } else if d.abs().simd_lt(TRIGRANGEMAX).all() {
                 let dqh = (d * (FRAC_2_PI / D1_24X)).trunc();
                 let dqh = dqh * D1_24X;
                 let dql = (d * FRAC_2_PI - dqh).round();
                 ql = dql.roundi();
 
-                s = dqh.mul_add(-PI_A * HALF, d);
-                s = dql.mul_add(-PI_A * HALF, s);
-                s = dqh.mul_add(-PI_B * HALF, s);
-                s = dql.mul_add(-PI_B * HALF, s);
-                s = dqh.mul_add(-PI_C * HALF, s);
-                s = dql.mul_add(-PI_C * HALF, s);
-                s = (dqh + dql).mul_add(-PI_D * HALF, s);
+                s = dqh.mla(-PI_A * HALF, d);
+                s = dql.mla(-PI_A * HALF, s);
+                s = dqh.mla(-PI_B * HALF, s);
+                s = dql.mla(-PI_B * HALF, s);
+                s = dqh.mla(-PI_C * HALF, s);
+                s = dql.mla(-PI_C * HALF, s);
+                s = (dqh + dql).mla(-PI_D * HALF, s);
             } else {
                 let (ddidd, ddii) = rempi(d);
                 ql = ddii;
@@ -375,24 +375,24 @@ macro_rules! impl_math_f64_u35 {
             s = s * s;
 
             let u = F64x::splat(1.589_383_072_832_289_373_285_11_e-10)
-                .mul_add(s, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
-                .mul_add(s, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
-                .mul_add(s, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
-                .mul_add(s, F64x::splat(0.008_333_333_333_319_184_596_174_6))
-                .mul_add(s, F64x::splat(-0.166_666_666_666_666_130_709_393));
+                .mla(s, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
+                .mla(s, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
+                .mla(s, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
+                .mla(s, F64x::splat(0.008_333_333_333_319_184_596_174_6))
+                .mla(s, F64x::splat(-0.166_666_666_666_666_130_709_393));
 
-            let rx = (u * s).mul_add(t, t);
+            let rx = (u * s).mla(t, t);
             let rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F64x::splat(-1.136_153_502_390_974_295_315_23_e-11)
-                .mul_add(s, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
-                .mul_add(s, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
-                .mul_add(s, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
-                .mul_add(s, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
-                .mul_add(s, F64x::splat(0.041_666_666_666_666_551_959_206_2))
-                .mul_add(s, F64x::splat(-0.5));
+                .mla(s, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
+                .mla(s, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
+                .mla(s, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
+                .mla(s, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
+                .mla(s, F64x::splat(0.041_666_666_666_666_551_959_206_2))
+                .mla(s, F64x::splat(-0.5));
 
-            let ry = s.mul_add(u, ONE);
+            let ry = s.mla(u, ONE);
 
             let o = (ql & Ix::splat(1)).simd_eq(Ix::splat(0)).cast();
             let mut rsin = o.select(rx, ry);
@@ -422,8 +422,8 @@ macro_rules! impl_math_f64_u35 {
 
             let dql = (s * FRAC_2_PI).round();
             let mut ql = dql.roundi();
-            s = dql.mul_add(-PI_A2 * HALF, s);
-            s = dql.mul_add(-PI_B2 * HALF, s);
+            s = dql.mla(-PI_A2 * HALF, s);
+            s = dql.mla(-PI_B2 * HALF, s);
             let g = d.abs().simd_lt(TRIGRANGEMAX2);
 
             if !g.all() {
@@ -431,13 +431,13 @@ macro_rules! impl_math_f64_u35 {
                 dqh *= D1_24X;
                 let dql = (d * FRAC_2_PI - dqh).round();
 
-                let mut u = dqh.mul_add(-PI_A * HALF, d);
-                u = dql.mul_add(-PI_A * HALF, u);
-                u = dqh.mul_add(-PI_B * HALF, u);
-                u = dql.mul_add(-PI_B * HALF, u);
-                u = dqh.mul_add(-PI_C * HALF, u);
-                u = dql.mul_add(-PI_C * HALF, u);
-                u = (dqh + dql).mul_add(-PI_D * HALF, u);
+                let mut u = dqh.mla(-PI_A * HALF, d);
+                u = dql.mla(-PI_A * HALF, u);
+                u = dqh.mla(-PI_B * HALF, u);
+                u = dql.mla(-PI_B * HALF, u);
+                u = dqh.mla(-PI_C * HALF, u);
+                u = dql.mla(-PI_C * HALF, u);
+                u = (dqh + dql).mla(-PI_D * HALF, u);
 
                 ql = g.cast().select(ql, dql.roundi());
                 s = g.select(s, u);
@@ -458,24 +458,24 @@ macro_rules! impl_math_f64_u35 {
             s = s * s;
 
             let u = F64x::splat(1.589_383_072_832_289_373_285_11_e-10)
-                .mul_add(s, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
-                .mul_add(s, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
-                .mul_add(s, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
-                .mul_add(s, F64x::splat(0.008_333_333_333_319_184_596_174_6))
-                .mul_add(s, F64x::splat(-0.166_666_666_666_666_130_709_393));
+                .mla(s, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
+                .mla(s, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
+                .mla(s, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
+                .mla(s, F64x::splat(0.008_333_333_333_319_184_596_174_6))
+                .mla(s, F64x::splat(-0.166_666_666_666_666_130_709_393));
 
-            let mut rx = (u * s).mul_add(t, t);
+            let mut rx = (u * s).mla(t, t);
             rx = d.is_neg_zero().select(NEG_ZERO, rx);
 
             let u = F64x::splat(-1.136_153_502_390_974_295_315_23_e-11)
-                .mul_add(s, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
-                .mul_add(s, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
-                .mul_add(s, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
-                .mul_add(s, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
-                .mul_add(s, F64x::splat(0.041_666_666_666_666_551_959_206_2))
-                .mul_add(s, F64x::splat(-0.5));
+                .mla(s, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
+                .mla(s, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
+                .mla(s, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
+                .mla(s, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
+                .mla(s, F64x::splat(0.041_666_666_666_666_551_959_206_2))
+                .mla(s, F64x::splat(-0.5));
 
-            let ry = s.mul_add(u, ONE);
+            let ry = s.mla(u, ONE);
 
             let o = (ql & Ix::splat(1)).simd_eq(Ix::splat(0)).cast();
             let mut rsin = o.select(rx, ry);
@@ -526,21 +526,21 @@ macro_rules! impl_math_f64_u35 {
             if d.abs().simd_lt(TRIGRANGEMAX2).all() {
                 let dql = (d * FRAC_2_PI).round();
                 ql = dql.roundi();
-                x = dql.mul_add(-PI_A2 * HALF, d);
-                x = dql.mul_add(-PI_B2 * HALF, x);
+                x = dql.mla(-PI_A2 * HALF, d);
+                x = dql.mla(-PI_B2 * HALF, x);
             } else if d.abs().simd_lt(F64x::splat(1e+6)).all() {
                 let dqh = (d * (FRAC_2_PI / D1_24X)).trunc();
                 let dqh = dqh * D1_24X;
                 let dql = (d * FRAC_2_PI - dqh).round();
                 ql = dql.roundi();
 
-                x = dqh.mul_add(-PI_A * HALF, d);
-                x = dql.mul_add(-PI_A * HALF, x);
-                x = dqh.mul_add(-PI_B * HALF, x);
-                x = dql.mul_add(-PI_B * HALF, x);
-                x = dqh.mul_add(-PI_C * HALF, x);
-                x = dql.mul_add(-PI_C * HALF, x);
-                x = (dqh + dql).mul_add(-PI_D * HALF, x);
+                x = dqh.mla(-PI_A * HALF, d);
+                x = dql.mla(-PI_A * HALF, x);
+                x = dqh.mla(-PI_B * HALF, x);
+                x = dql.mla(-PI_B * HALF, x);
+                x = dqh.mla(-PI_C * HALF, x);
+                x = dql.mla(-PI_C * HALF, x);
+                x = (dqh + dql).mla(-PI_D * HALF, x);
             } else {
                 let (ddidd, ddii) = rempi(d);
                 ql = ddii;
@@ -564,10 +564,10 @@ macro_rules! impl_math_f64_u35 {
                 0.218_694_872_818_553_549_8_e-1,
                 0.539_682_539_951_727_297_e-1,
                 0.133_333_333_333_050_058_1)
-                .mul_add(s, F64x::splat(0.333_333_333_333_334_369_5));
-            u = s.mul_add(u * x, x);
+                .mla(s, F64x::splat(0.333_333_333_333_334_369_5));
+            u = s.mla(u * x, x);
 
-            let y = u.mul_add(u, -ONE);
+            let y = u.mla(u, -ONE);
             x = u * F64x::splat(-2.);
 
             let o = (ql & Ix::splat(1)).simd_eq(Ix::splat(1)).cast();
@@ -585,8 +585,8 @@ macro_rules! impl_math_f64_u35 {
         pub fn tan_deterministic(d: F64x) -> F64x {
             let dql = (d * FRAC_2_PI).round();
             let mut ql = dql.roundi();
-            let mut s = dql.mul_add(-PI_A2 * HALF, d);
-            s = dql.mul_add(-PI_B2 * HALF, s);
+            let mut s = dql.mla(-PI_A2 * HALF, d);
+            s = dql.mla(-PI_B2 * HALF, s);
             let g = d.abs().simd_lt(TRIGRANGEMAX2);
 
             if !g.all() {
@@ -594,13 +594,13 @@ macro_rules! impl_math_f64_u35 {
                 dqh *= D1_24X;
                 let dql = (d * FRAC_2_PI - dqh).round();
 
-                let mut u = dqh.mul_add(-PI_A * HALF, d);
-                u = dql.mul_add(-PI_A * HALF, u);
-                u = dqh.mul_add(-PI_B * HALF, u);
-                u = dql.mul_add(-PI_B * HALF, u);
-                u = dqh.mul_add(-PI_C * HALF, u);
-                u = dql.mul_add(-PI_C * HALF, u);
-                u = (dqh + dql).mul_add(-PI_D * HALF, u);
+                let mut u = dqh.mla(-PI_A * HALF, d);
+                u = dql.mla(-PI_A * HALF, u);
+                u = dqh.mla(-PI_B * HALF, u);
+                u = dql.mla(-PI_B * HALF, u);
+                u = dqh.mla(-PI_C * HALF, u);
+                u = dql.mla(-PI_C * HALF, u);
+                u = (dqh + dql).mla(-PI_D * HALF, u);
 
                 ql = g.cast().select(ql, dql.roundi());
                 s = g.select(s, u);
@@ -632,10 +632,10 @@ macro_rules! impl_math_f64_u35 {
                 0.218_694_872_818_553_549_8_e-1,
                 0.539_682_539_951_727_297_e-1,
                 0.133_333_333_333_050_058_1)
-                .mul_add(s, F64x::splat(0.333_333_333_333_334_369_5));
-            u = s.mul_add(u * x, x);
+                .mla(s, F64x::splat(0.333_333_333_333_334_369_5));
+            u = s.mla(u * x, x);
 
-            let y = u.mul_add(u, -ONE);
+            let y = u.mla(u, -ONE);
             let x = u * F64x::splat(-2.);
 
             let o = (ql & Ix::splat(1)).simd_eq(Ix::splat(1)).cast();
@@ -678,23 +678,23 @@ macro_rules! impl_math_f64_u35 {
             //
 
             let u = F64x::splat(0.688_063_889_476_606_013_6_e-11)
-                .mul_add(s, F64x::splat(-0.175_715_956_454_231_019_9_e-8))
-                .mul_add(s, F64x::splat(0.313_361_632_725_786_731_1_e-6))
-                .mul_add(s, F64x::splat(-0.365_762_041_638_848_645_2_e-4))
-                .mul_add(s, F64x::splat(0.249_039_457_018_993_210_3_e-2))
-                .mul_add(s, F64x::splat(-0.807_455_121_882_805_632_e-1))
-                .mul_add(s, F64x::splat(0.785_398_163_397_448_279));
+                .mla(s, F64x::splat(-0.175_715_956_454_231_019_9_e-8))
+                .mla(s, F64x::splat(0.313_361_632_725_786_731_1_e-6))
+                .mla(s, F64x::splat(-0.365_762_041_638_848_645_2_e-4))
+                .mla(s, F64x::splat(0.249_039_457_018_993_210_3_e-2))
+                .mla(s, F64x::splat(-0.807_455_121_882_805_632_e-1))
+                .mla(s, F64x::splat(0.785_398_163_397_448_279));
 
             let rx = u * t;
 
             let u = F64x::splat(-0.386_014_121_368_379_435_2_e-12)
-                .mul_add(s, F64x::splat(0.115_005_788_802_968_141_5_e-9))
-                .mul_add(s, F64x::splat(-0.246_113_649_300_666_355_3_e-7))
-                .mul_add(s, F64x::splat(0.359_086_044_662_351_671_3_e-5))
-                .mul_add(s, F64x::splat(-0.325_991_886_926_943_594_2_e-3))
-                .mul_add(s, F64x::splat(0.158_543_442_438_154_116_9_e-1))
-                .mul_add(s, F64x::splat(-0.308_425_137_534_042_437_3))
-                .mul_add(s, ONE);
+                .mla(s, F64x::splat(0.115_005_788_802_968_141_5_e-9))
+                .mla(s, F64x::splat(-0.246_113_649_300_666_355_3_e-7))
+                .mla(s, F64x::splat(0.359_086_044_662_351_671_3_e-5))
+                .mla(s, F64x::splat(-0.325_991_886_926_943_594_2_e-3))
+                .mla(s, F64x::splat(0.158_543_442_438_154_116_9_e-1))
+                .mla(s, F64x::splat(-0.308_425_137_534_042_437_3))
+                .mla(s, ONE);
 
             let ry = u;
 
@@ -782,8 +782,8 @@ macro_rules! impl_math_f64_u35 {
                 -0.333_333_333_333_311_110_369_124,
             );
 
-            t = s.mul_add(t * u, s);
-            q.cast::<f64>().mul_add(FRAC_PI_2, t)
+            t = s.mla(t * u, s);
+            q.cast::<f64>().mla(FRAC_PI_2, t)
         }
 
         /// Arc tangent function of two variables
@@ -849,9 +849,9 @@ macro_rules! impl_math_f64_u35 {
                 0.750_000_000_037_858_161_1_e-1,
                 0.166_666_666_666_649_754_3);
 
-            u = u.mul_add(x * x2, x);
+            u = u.mla(x * x2, x);
 
-            let r = o.select(u, u.mul_add(F64x::splat(-2.), FRAC_PI_2));
+            let r = o.select(u, u.mla(F64x::splat(-2.), FRAC_PI_2));
             r.mul_sign(d)
         }
 
@@ -958,7 +958,7 @@ macro_rules! impl_math_f64_u35 {
                 0.199_999_999_996_591_265_594_148,
                 -0.333_333_333_333_311_110_369_124);
 
-            t = s.mul_add(t * u, s);
+            t = s.mla(t * u, s);
 
             t = (q & Ix::splat(1)).simd_eq(Ix::splat(1)).cast().select(FRAC_PI_2 - t, t);
             t = F64x::from_bits(
@@ -989,8 +989,8 @@ macro_rules! impl_math_f64_u35 {
             let mut u = (d * R_LN2).round();
             let q = u.roundi();
 
-            let s = u.mul_add(-L2_U, d);
-            let s = u.mul_add(-L2_L, s);
+            let s = u.mla(-L2_U, d);
+            let s = u.mla(-L2_L, s);
 
             let s2 = s * s;
             let s4 = s2 * s2;
@@ -1013,7 +1013,7 @@ macro_rules! impl_math_f64_u35 {
                 0.166_666_666_666_666_851_703_837,
             );
 
-            u = s2.mul_add(HALF, s2 * s * u) + s;
+            u = s2.mla(HALF, s2 * s * u) + s;
 
             q.simd_eq(Ix::splat(0)).cast().select(u, ldexp2k(u + ONE, q) - ONE)
         }
@@ -1053,7 +1053,7 @@ macro_rules! impl_math_f64_u35 {
         /// or a correct value with `3.5 ULP` error bound is returned.
         pub fn cosh(x: F64x) -> F64x {
             let e = u10::exp(x.abs());
-            let mut y = HALF.mul_add(e, HALF / e);
+            let mut y = HALF.mla(e, HALF / e);
 
             y = (x.abs().simd_gt(F64x::splat(709.)) | y.is_nan()).select(INFINITY, y);
             F64x::from_bits(x.is_nan().to_int().cast() | y.to_bits())
@@ -1132,15 +1132,15 @@ macro_rules! impl_math_f64_u35 {
                 0.666_666_666_666_777_874_006_3);
 
             /*if !cfg!(feature = "enable_avx512f") && !cfg!(feature = "enable_avx512fnofma") {*/
-            x = x.mul_add(F64x::splat(2.), F64x::splat(0.693_147_180_559_945_286_226_764) * ef);
-            x = x3.mul_add(t, x);
+            x = x.mla(F64x::splat(2.), F64x::splat(0.693_147_180_559_945_286_226_764) * ef);
+            x = x3.mla(t, x);
 
             x = d.simd_eq(INFINITY).select(INFINITY, x);
             x = (d.simd_lt(ZERO) | d.is_nan()).select(NAN, x);
             d.simd_eq(ZERO).select(NEG_INFINITY, x)
             /* } else {
-                x = x.mul_add(F64x::splat(2.), F64x::splat(0.693_147_180_559_945_286_226_764) * ef);
-                x = x3.mul_add(t, x);
+                x = x.mla(F64x::splat(2.), F64x::splat(0.693_147_180_559_945_286_226_764) * ef);
+                x = x3.mla(t, x);
                 vfixup_vd_vd_vd_vi2_i(x, d, I64x::splat((5 << (5 * 4))), 0)
             }*/
         }
@@ -1174,12 +1174,12 @@ macro_rules! impl_math_f64_u35 {
             let x2 = x * x;
 
             let t = F64x::splat(0.221_194_175_045_608_149)
-                .mul_add(x2, F64x::splat(0.220_076_869_315_227_768_9))
-                .mul_add(x2, F64x::splat(0.262_370_805_748_851_465_6))
-                .mul_add(x2, F64x::splat(0.320_597_747_794_449_550_2))
-                .mul_add(x2, F64x::splat(0.412_198_594_548_532_470_9))
-                .mul_add(x2, F64x::splat(0.577_078_016_299_705_898_2))
-                .mul_add(x2, F64x::splat(0.961_796_693_926_080_914_49));
+                .mla(x2, F64x::splat(0.220_076_869_315_227_768_9))
+                .mla(x2, F64x::splat(0.262_370_805_748_851_465_6))
+                .mla(x2, F64x::splat(0.320_597_747_794_449_550_2))
+                .mla(x2, F64x::splat(0.412_198_594_548_532_470_9))
+                .mla(x2, F64x::splat(0.577_078_016_299_705_898_2))
+                .mla(x2, F64x::splat(0.961_796_693_926_080_914_49));
 
             let s = //if !cfg!(feature = "enable_avx512f") && !cfg!(feature = "enable_avx512fnofma")
             {
@@ -1188,7 +1188,7 @@ macro_rules! impl_math_f64_u35 {
                 e.add_checked(x.mul_as_doubled(F64x::splat(2.885_390_081_777_926_774)))
             */ };
 
-            let mut r = t.mul_add(x * x2, F64x::from(s));
+            let mut r = t.mla(x * x2, F64x::from(s));
 
             //if !cfg!(feature = "enable_avx512f") && !cfg!(feature = "enable_avx512fnofma") {
             r = d.simd_eq(INFINITY).select(INFINITY, r);
@@ -1218,8 +1218,8 @@ macro_rules! impl_math_f64_u35 {
             let mut u = (d * LOG10_2).round();
             let q = u.roundi();
 
-            let mut s = u.mul_add(-L10_U, d);
-            s = u.mul_add(-L10_L, s);
+            let mut s = u.mla(-L10_U, d);
+            s = u.mla(-L10_L, s);
 
             let s2 = s * s;
             let s4 = s2 * s2;
@@ -1242,7 +1242,7 @@ macro_rules! impl_math_f64_u35 {
                 0.230_258_509_299_404_590_1_e+1,
             );
 
-            u = u.mul_add(s, ONE);
+            u = u.mla(s, ONE);
 
             u = ldexp2k(u, q);
 
@@ -1290,9 +1290,9 @@ macro_rules! impl_math_f64_u35 {
                 0.555_041_086_648_204_659_6_e-1,
                 0.240_226_506_959_101_221_4,
             );
-            u = u.mul_add(s, F64x::splat(0.693_147_180_559_945_286_2));
+            u = u.mla(s, F64x::splat(0.693_147_180_559_945_286_2));
 
-            u = u.mul_add(s, ONE);
+            u = u.mla(s, ONE);
 
             u = ldexp2k(u, q);
 
@@ -1344,17 +1344,17 @@ macro_rules! impl_math_f64_u35 {
             d = d.abs();
 
             let mut x = F64x::splat(-0.640_245_898_480_692_909_870_982)
-                .mul_add(d, F64x::splat(2.961_551_030_200_395_118_185_95))
-                .mul_add(d, F64x::splat(-5.733_530_609_229_478_436_361_66))
-                .mul_add(d, F64x::splat(6.039_903_689_894_587_479_614_07))
-                .mul_add(d, F64x::splat(-3.858_419_355_104_449_888_216_32))
-                .mul_add(d, F64x::splat(2.230_727_530_249_660_972_572_2));
+                .mla(d, F64x::splat(2.961_551_030_200_395_118_185_95))
+                .mla(d, F64x::splat(-5.733_530_609_229_478_436_361_66))
+                .mla(d, F64x::splat(6.039_903_689_894_587_479_614_07))
+                .mla(d, F64x::splat(-3.858_419_355_104_449_888_216_32))
+                .mla(d, F64x::splat(2.230_727_530_249_660_972_572_2));
 
             let mut y = x * x;
             y = y * y;
             x -= d.mul_sub(y, x) * F64x::splat(1. / 3.);
             y = d * x * x;
-            y = (y - F64x::splat(2. / 3.) * y * y.mul_add(x, F64x::splat(-1.))) * q;
+            y = (y - F64x::splat(2. / 3.) * y * y.mla(x, F64x::splat(-1.))) * q;
 
             /*if cfg!(feature = "enable_avx512f") || cfg!(feature = "enable_avx512fnofma") {
                 y = s.is_infinite().select(INFINITY.mul_sign(s), y);
@@ -1386,7 +1386,7 @@ macro_rules! impl_math_f64_u35 {
             let max = x.simd_max(y);
 
             let t = min / max;
-            let mut ret = max * t.mul_add(t, ONE).sqrt();
+            let mut ret = max * t.mla(t, ONE).sqrt();
             ret = min.simd_eq(ZERO).select(max, ret);
             ret = (x.is_nan() | y.is_nan()).select(NAN, ret);
             (x.simd_eq(INFINITY) | y.simd_eq(INFINITY)).select(INFINITY, ret)