From 99b3de398c1cc5653fa0976b39a1d0e3dc15ac3c Mon Sep 17 00:00:00 2001 From: Thomas Dubos Date: Tue, 7 Nov 2023 16:27:49 +0100 Subject: [PATCH 1/2] Fix (x::Vec)^n for n>3 --- src/LLVM_intrinsics.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LLVM_intrinsics.jl b/src/LLVM_intrinsics.jl index 3c9ecd5..b24198e 100644 --- a/src/LLVM_intrinsics.jl +++ b/src/LLVM_intrinsics.jl @@ -293,7 +293,7 @@ end # pow, powi -for (f, c) in [(:pow, FloatingTypes), (:powi, Union{Int32,UInt32})] +for (f, c) in [(:pow, FloatingTypes), (:powi, Union{Int32,Int64,UInt32,UInt64})] @eval @generated function $(f)(x::T, y::T2) where {T <: LT{<:FloatingTypes}, T2 <: $c} ff = llvm_name($(QuoteNode(f)), T) * "." * suffix(T2) return :( From 066c91eb9430cddd58ce9adb895a81a4115b45e8 Mon Sep 17 00:00:00 2001 From: Thomas Dubos Date: Wed, 8 Nov 2023 00:00:31 +0100 Subject: [PATCH 2/2] Implement literal_pow for Vec --- src/LLVM_intrinsics.jl | 2 +- src/simdvec.jl | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/LLVM_intrinsics.jl b/src/LLVM_intrinsics.jl index b24198e..3c9ecd5 100644 --- a/src/LLVM_intrinsics.jl +++ b/src/LLVM_intrinsics.jl @@ -293,7 +293,7 @@ end # pow, powi -for (f, c) in [(:pow, FloatingTypes), (:powi, Union{Int32,Int64,UInt32,UInt64})] +for (f, c) in [(:pow, FloatingTypes), (:powi, Union{Int32,UInt32})] @eval @generated function $(f)(x::T, y::T2) where {T <: LT{<:FloatingTypes}, T2 <: $c} ff = llvm_name($(QuoteNode(f)), T) * "." * suffix(T2) return :( diff --git a/src/simdvec.jl b/src/simdvec.jl index 96dd9a1..f78ef38 100644 --- a/src/simdvec.jl +++ b/src/simdvec.jl @@ -290,7 +290,17 @@ end @inline Base.literal_pow(::typeof(^), x::Vec, ::Val{0}) = one(typeof(x)) @inline Base.literal_pow(::typeof(^), x::Vec, ::Val{1}) = x @inline Base.literal_pow(::typeof(^), x::Vec, ::Val{2}) = x*x -@inline Base.literal_pow(::typeof(^), x::Vec, ::Val{3}) = x*x*x +@inline function Base.literal_pow(::typeof(^), x::Vec, ::Val{N}) where N + M = div(N,2) + N<0 && return inv(Base.literal_pow(^, x, Val(-N))) + N<256 && return Base.literal_pow(^, x, Val(M))*Base.literal_pow(^, x, Val(N-M)) + y, n, xn = one(x), 1, x + while n<=N + (n&N)==0 || (y = y*xn) + n, xn = 2n, xn*xn + end + y +end # Sign @inline Base.flipsign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T} =