JuliaGPU · christiangnrd · Jan 29, 2025 · maleadt · Feb 3, 2025
diff --git a/src/device/intrinsics/math.jl b/src/device/intrinsics/math.jl
@@ -294,6 +294,11 @@ end
 @device_override Base.trunc(x::Float32) = ccall("extern air.trunc.f32", llvmcall, Cfloat, (Cfloat,), x)
 @device_override Base.trunc(x::Float16) = ccall("extern air.trunc.f16", llvmcall, Float16, (Float16,), x)
 
+@static if Metal.is_macos(v"14")
+    @device_function nextafter(x::Float32, y::Float32) = ccall("extern air.nextafter.f32", llvmcall, Cfloat, (Cfloat, Cfloat), x, y)
+    @device_function nextafter(x::Float16, y::Float16) = ccall("extern air.nextafter.f16", llvmcall, Float16, (Float16, Float16), x, y)
+end
+
 # hypot without use of double
 #
 # taken from Cosmopolitan Libc

diff --git a/test/device/intrinsics.jl b/test/device/intrinsics.jl
@@ -159,7 +159,6 @@ MATH_INTR_FUNCS_2_ARG = [
     # frexp, # T frexp(T x, Ti &exponent)
     # ldexp, # T ldexp(T x, Ti k)
     # modf, # T modf(T x, T &intval)
-    # nextafter, # T nextafter(T x, T y) # Metal 3.1+
     hypot, # NOT MSL but tested the same
 ]
 
@@ -353,6 +352,25 @@ end
         vec = Array(expm1.(buffer))
         @test vec ≈ expm1.(arr)
     end
+
+
+    let # nextafter
+        if Metal.is_macos(v"14")
+            N = 4
+            function nextafter_test(X, y)
+                idx = thread_position_in_grid_1d()
+                X[idx] = Metal.nextafter(X[idx], y)
+                return nothing
+            end
+            arr = rand(T, N)
+            buffer = MtlArray(arr)
+            Metal.@sync @metal threads = N nextafter_test(buffer, typemax(T))
+            @test Array(buffer) == nextfloat.(arr)
+
+            Metal.@sync @metal threads = N nextafter_test(buffer, typemin(T))
+            @test Array(buffer) == arr
+        end
+    end
 end
 end