From 72d3e499371f52add088fcca25e0fe01fa72e123 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Wed, 16 Oct 2024 22:08:09 +0100 Subject: [PATCH] Optimize some floating point operations, but maintain parity with Java --- pumpkin-core/src/math/mod.rs | 2 +- pumpkin-core/src/random/gaussian.rs | 4 ++-- pumpkin-core/src/random/mod.rs | 2 +- pumpkin-world/src/world_gen/generic_generator.rs | 2 +- pumpkin-world/src/world_gen/noise/density/end.rs | 7 ++++--- .../src/world_gen/noise/density/noise.rs | 6 +++--- .../src/world_gen/noise/density/spline.rs | 15 +++++++++------ .../world_gen/noise/density/terrain_helpers.rs | 6 +++--- pumpkin-world/src/world_gen/noise/mod.rs | 2 +- pumpkin-world/src/world_gen/noise/perlin.rs | 4 +++- pumpkin-world/src/world_gen/noise/simplex.rs | 8 ++++---- 11 files changed, 32 insertions(+), 26 deletions(-) diff --git a/pumpkin-core/src/math/mod.rs b/pumpkin-core/src/math/mod.rs index de67a853..e1f7149f 100644 --- a/pumpkin-core/src/math/mod.rs +++ b/pumpkin-core/src/math/mod.rs @@ -18,7 +18,7 @@ pub fn wrap_degrees(var: f32) -> f32 { } pub fn squared_magnitude(a: f64, b: f64, c: f64) -> f64 { - a * a + b * b + c * c + c.mul_add(c, a.mul_add(a, b * b)) } pub fn magnitude(a: f64, b: f64, c: f64) -> f64 { diff --git a/pumpkin-core/src/random/gaussian.rs b/pumpkin-core/src/random/gaussian.rs index 35d4095c..36f3c051 100644 --- a/pumpkin-core/src/random/gaussian.rs +++ b/pumpkin-core/src/random/gaussian.rs @@ -11,8 +11,8 @@ pub trait GaussianGenerator: RandomImpl { gaussian } else { loop { - let d = 2f64 * self.next_f64() - 1f64; - let e = 2f64 * self.next_f64() - 1f64; + let d = 2f64.mul_add(self.next_f64(), -1f64); + let e = 2f64.mul_add(self.next_f64(), -1f64); let f = d * d + e * e; if f < 1f64 && f != 0f64 { diff --git a/pumpkin-core/src/random/mod.rs b/pumpkin-core/src/random/mod.rs index c8737213..2c3d4112 100644 --- a/pumpkin-core/src/random/mod.rs +++ b/pumpkin-core/src/random/mod.rs @@ -109,7 +109,7 @@ impl RandomGenerator { #[inline] pub fn next_triangular(&mut self, mode: f64, deviation: f64) -> f64 { - mode + deviation * (self.next_f64() - self.next_f64()) + deviation.mul_add(self.next_f64() - self.next_f64(), mode) } #[inline] diff --git a/pumpkin-world/src/world_gen/generic_generator.rs b/pumpkin-world/src/world_gen/generic_generator.rs index 1f0a1765..4888e536 100644 --- a/pumpkin-world/src/world_gen/generic_generator.rs +++ b/pumpkin-world/src/world_gen/generic_generator.rs @@ -40,7 +40,7 @@ impl WorldGenerator for GenericGen let base_height = 64.0; let height_variation = 16.0; - let chunk_height = (noise_value * height_variation + base_height) as i32; + let chunk_height = noise_value.mul_add(height_variation, base_height) as i32; for x in 0..16u8 { for z in 0..16u8 { diff --git a/pumpkin-world/src/world_gen/noise/density/end.rs b/pumpkin-world/src/world_gen/noise/density/end.rs index fd22fb25..f546701a 100644 --- a/pumpkin-world/src/world_gen/noise/density/end.rs +++ b/pumpkin-world/src/world_gen/noise/density/end.rs @@ -29,7 +29,7 @@ impl EndIslandFunction { let k = x % 2; let l = z % 2; - let f = 100f32 - ((x * x + z * z) as f32).sqrt() * 8f32; + let f = ((x * x + z * z) as f32).sqrt().mul_add(-8f32, 100f32); let mut f = f.clamp(-100f32, 80f32); for m in -12..=12 { @@ -40,10 +40,11 @@ impl EndIslandFunction { if (o * o + p * p) > 4096i64 && sampler.sample_2d(o as f64, p as f64) < -0.9f32 as f64 { - let g = ((o as f32).abs() * 3439f32 + (p as f32).abs() * 147f32) % 13f32 + 9f32; + let g = + (o as f32).abs().mul_add(3439f32, (p as f32).abs() * 147f32) % 13f32 + 9f32; let h = (k - m * 2) as f32; let q = (l - n * 2) as f32; - let r = 100f32 - (h * h + q * q).sqrt() * g; + let r = h.hypot(q).mul_add(-g, 100f32); let s = r.clamp(-100f32, 80f32); f = f.max(s); diff --git a/pumpkin-world/src/world_gen/noise/density/noise.rs b/pumpkin-world/src/world_gen/noise/density/noise.rs index d4406bb2..71843e58 100644 --- a/pumpkin-world/src/world_gen/noise/density/noise.rs +++ b/pumpkin-world/src/world_gen/noise/density/noise.rs @@ -118,9 +118,9 @@ impl<'a> ShiftedNoiseFunction<'a> { impl<'a> DensityFunctionImpl<'a> for ShiftedNoiseFunction<'a> { fn sample(&self, pos: &NoisePos) -> f64 { - let d = pos.x() as f64 * self.xz_scale + self.shift_x.sample(pos); - let e = pos.y() as f64 * self.y_scale + self.shift_y.sample(pos); - let f = pos.z() as f64 * self.xz_scale + self.shift_z.sample(pos); + let d = (pos.x() as f64).mul_add(self.xz_scale, self.shift_x.sample(pos)); + let e = (pos.y() as f64).mul_add(self.y_scale, self.shift_y.sample(pos)); + let f = (pos.z() as f64).mul_add(self.xz_scale, self.shift_z.sample(pos)); self.noise.sample(d, e, f) } diff --git a/pumpkin-world/src/world_gen/noise/density/spline.rs b/pumpkin-world/src/world_gen/noise/density/spline.rs index c2480d7c..aebffcfa 100644 --- a/pumpkin-world/src/world_gen/noise/density/spline.rs +++ b/pumpkin-world/src/world_gen/noise/density/spline.rs @@ -67,7 +67,7 @@ impl<'a> Spline<'a> { if f == 0f32 { value } else { - value + f * (point - points[i].location) + f.mul_add(point - points[i].location, value) } } @@ -154,8 +154,8 @@ impl<'a> Spline<'a> { let ad = z.min(ab); let ae = aa.max(ac); - f = f.min(x + 0.25f32 * ad); - g = g.max(y + 0.25f32 * ae); + f = f.min(0.25f32.mul_add(ad, x)); + g = g.max(0.25f32.mul_add(ae, y)); } } @@ -189,9 +189,12 @@ impl<'a> Spline<'a> { let n = point_1.value.apply(pos); let o = point_2.value.apply(pos); - let p = point_1.derivative * (point_2.location - point_1.location) - (o - n); - let q = -point_2.derivative * (point_2.location - point_1.location) + (o - n); - lerp(k, n, o) + k * (1f32 - k) * lerp(k, p, q) + let p = point_1 + .derivative + .mul_add(point_2.location - point_1.location, -(o - n)); + let q = + (-point_2.derivative).mul_add(point_2.location - point_1.location, o - n); + (k * (1f32 - k)).mul_add(lerp(k, p, q), lerp(k, n, o)) } } Range::Below => { diff --git a/pumpkin-world/src/world_gen/noise/density/terrain_helpers.rs b/pumpkin-world/src/world_gen/noise/density/terrain_helpers.rs index 5e1fdec4..2d478fb9 100644 --- a/pumpkin-world/src/world_gen/noise/density/terrain_helpers.rs +++ b/pumpkin-world/src/world_gen/noise/density/terrain_helpers.rs @@ -10,11 +10,11 @@ use crate::world_gen::noise::lerp; #[inline] fn get_offset_value(f: f32, g: f32, h: f32) -> f32 { - let k = 1f32 - (1f32 - g) * 0.5f32; + let k = (1f32 - g).mul_add(-0.5f32, 1f32); let l = 0.5f32 * (1f32 - g); let m = (f + 1.17f32) * 0.46082947f32; - let n = m * k - l; + let n = m.mul_add(k, -l); if f < h { n.max(-0.2222f32) @@ -25,7 +25,7 @@ fn get_offset_value(f: f32, g: f32, h: f32) -> f32 { #[inline] fn skew_map(f: f32) -> f32 { - let k = 1f32 - (1f32 - f) * 0.5f32; + let k = (1f32 - f).mul_add(-0.5f32, 1f32); let l = 0.5f32 * (1f32 - f); l / (0.46082947f32 * k) - 1.17f32 diff --git a/pumpkin-world/src/world_gen/noise/mod.rs b/pumpkin-world/src/world_gen/noise/mod.rs index 0f9de83f..72d00920 100644 --- a/pumpkin-world/src/world_gen/noise/mod.rs +++ b/pumpkin-world/src/world_gen/noise/mod.rs @@ -265,5 +265,5 @@ const GRADIENTS: [Gradient; 16] = [ ]; fn dot(gradient: &Gradient, x: f64, y: f64, z: f64) -> f64 { - gradient.x as f64 * x + gradient.y as f64 * y + gradient.z as f64 * z + (gradient.z as f64).mul_add(z, (gradient.x as f64).mul_add(x, gradient.y as f64 * y)) } diff --git a/pumpkin-world/src/world_gen/noise/perlin.rs b/pumpkin-world/src/world_gen/noise/perlin.rs index 74dcec72..7d952783 100644 --- a/pumpkin-world/src/world_gen/noise/perlin.rs +++ b/pumpkin-world/src/world_gen/noise/perlin.rs @@ -183,7 +183,9 @@ impl OctavePerlinNoiseSampler { } pub fn maintain_precision(value: f64) -> f64 { - value - (value / 3.3554432E7f64 + 0.5f64).floor() * 3.3554432E7f64 + (value / 3.3554432E7f64 + 0.5f64) + .floor() + .mul_add(-3.3554432E7f64, value) } pub fn calculate_amplitudes(octaves: &[i32]) -> (i32, Vec) { diff --git a/pumpkin-world/src/world_gen/noise/simplex.rs b/pumpkin-world/src/world_gen/noise/simplex.rs index fc4d9153..8310bc65 100644 --- a/pumpkin-world/src/world_gen/noise/simplex.rs +++ b/pumpkin-world/src/world_gen/noise/simplex.rs @@ -71,8 +71,8 @@ impl SimplexNoiseSampler { let n = h - l as f64 + Self::UNSKEW_FACTOR_2D; let o = k - m as f64 + Self::UNSKEW_FACTOR_2D; - let p = h - 1f64 + 2f64 * Self::UNSKEW_FACTOR_2D; - let q = k - 1f64 + 2f64 * Self::UNSKEW_FACTOR_2D; + let p = 2f64.mul_add(Self::UNSKEW_FACTOR_2D, h - 1f64); + let q = 2f64.mul_add(Self::UNSKEW_FACTOR_2D, k - 1f64); let r = i & 0xFF; let s = j & 0xFF; @@ -236,8 +236,8 @@ impl OctaveSimplexNoiseSampler { for sampler in self.octave_samplers.iter() { if let Some(sampler) = sampler { d += sampler.sample_2d( - x * e + if use_origin { sampler.x_origin } else { 0f64 }, - y * e + if use_origin { sampler.y_origin } else { 0f64 }, + x.mul_add(e, if use_origin { sampler.x_origin } else { 0f64 }), + y.mul_add(e, if use_origin { sampler.y_origin } else { 0f64 }), ) * f; }