Transpose and untranspose VecUnroll{N,1,T,T}s

JuliaSIMD · Jul 26, 2021 · e3a4917 · e3a4917 · chriselrod · Jul 26, 2021
1 parent 4571229
commit e3a4917
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 23 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "SLEEFPirates"
 uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
 authors = ["chriselrod <[email protected]>"]
-version = "0.6.22"
+version = "0.6.23"
 
 [deps]
 IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"

diff --git a/src/SLEEFPirates.jl b/src/SLEEFPirates.jl
@@ -128,16 +128,24 @@ include("misc.jl")   # miscallenous math functions including pow and cbrt
 
 # fallback definitions
 
+@generated function to_vecunrollscalar(v::Vec{W,T}, ::StaticInt{N}) where {N,W,T}
+  t = Expr(:tuple)
+  for n ∈ 0:N
+    push!(t.args, :(VectorizationBase.extractelement(v, $n)))
+  end
+  Expr(:block, Expr(:meta,:inline), :(VecUnroll($t)))
+end
 for func in (:sin, :cos, :tan, :asin, :acos, :atan, :sinh, :cosh, :tanh,
              :asinh, :acosh, :atanh, :log, :log2, :log10, :log1p, :expm1, :cbrt,
              :sin_fast, :cos_fast, :tan_fast, :asin_fast, :acos_fast, :atan_fast,# :atan2_fast,
              :log_fast, :log2_fast, :log10_fast, :cbrt_fast)#, :exp, :exp2, :exp10
-    @eval begin
-        $func(a::Float16) = Float16.($func(Float32(a)))
-        $func(x::Real) = $func(float(x))
-        @inline $func(v::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v))
-        @inline $func(i::MM) = $func(Vec(i))
-    end
+  @eval begin
+    $func(a::Float16) = Float16.($func(Float32(a)))
+    $func(x::Real) = $func(float(x))
+    @inline $func(v::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v))
+    @inline $func(i::MM) = $func(Vec(i))
+    @inline $func(v::VecUnroll{N,1,T,T}) where {N,T} = to_vecunrollscalar($func(VectorizationBase.transpose_vecunroll(v)), StaticInt{N}())
+  end
 end
 # Tπ(::Type{T}) where {T} = promote_type(T, typeof(π))(π)
 for func ∈ (:sin, :cos)
@@ -157,13 +165,17 @@ end
 @inline sincospi_fast(v::Vec{W,T}) where {W,T} = sincos_fast(T(π) * v)
 
 for func in (:sinh, :cosh, :tanh, :asinh, :acosh, :atanh, :log1p, :expm1)#, :exp, :exp2, :exp10
-    @eval @inline Base.$func(x::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64,Int32,UInt32,Int64,UInt64}} = $func(x)
-    @eval @inline Base.$func(x::MM) = $func(Vec(x))
+  @eval begin
+    @inline Base.$func(x::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64,Int32,UInt32,Int64,UInt64}} = $func(x)
+    @inline Base.$func(x::MM) = $func(Vec(x))
+  end
 end
 for func ∈ (:sin, :cos, :tan, :asin, :acos, :atan, :log, :log2, :log10, :cbrt, :sincos)
   func_fast = Symbol(func, :_fast)
-  @eval @inline Base.$func(x::AbstractSIMD) = $func_fast(float(x))
-  @eval @inline Base.FastMath.$func_fast(x::AbstractSIMD) = $func_fast(float(x))
+  @eval begin
+    @inline Base.$func(x::AbstractSIMD) = $func_fast(float(x))
+    @inline Base.FastMath.$func_fast(x::AbstractSIMD) = $func_fast(float(x))
+  end
 end
 @inline Base.FastMath.atan_fast(a::T, b::Number) where {T<:AbstractSIMD} = atan_fast(a, T(b))
 @inline Base.FastMath.atan_fast(a::Number, b::T) where {T<:AbstractSIMD} = atan_fast(T(a), b)
@@ -197,11 +209,11 @@ max_tanh(::Type{Float64}) = 19.0615474653984959950960955322853986741878634050481
 max_tanh(::Type{Float32}) = 9.010913339828708369989037671244720498805572920317272822795576296065428827978905f0
 
 @inline function tanh_fast(x)
-    exp2xm1 = expm1_fast(Base.FastMath.add_fast(x, x))
-    # Division is faster than approximate inversion in
-    # t = Base.FastMath.mul_fast(exp2xm1, Base.FastMath.inv_fast(Base.FastMath.add_fast(exp2xm1, typeof(x)(2))))
-    t = Base.FastMath.div_fast(exp2xm1, Base.FastMath.add_fast(exp2xm1, typeof(x)(2)))
-    ifelse(abs(x) > max_tanh(eltype(x)), copysign(one(x), x), t)
+  exp2xm1 = expm1_fast(Base.FastMath.add_fast(x, x))
+  # Division is faster than approximate inversion in
+  # t = Base.FastMath.mul_fast(exp2xm1, Base.FastMath.inv_fast(Base.FastMath.add_fast(exp2xm1, typeof(x)(2))))
+  t = Base.FastMath.div_fast(exp2xm1, Base.FastMath.add_fast(exp2xm1, typeof(x)(2)))
+  ifelse(abs(x) > max_tanh(eltype(x)), copysign(one(x), x), t)
 end
 @inline Base.FastMath.tanh_fast(x::AbstractSIMD) = tanh_fast(x)
 # sigmoid_max(::Type{Float64}) = 36.42994775023704665301938332748370611415146834112402863375388447785857586583462

diff --git a/src/log.jl b/src/log.jl
@@ -304,11 +304,11 @@ end
 @inline log_fast(d::Union{Float32,Float64}) = log_fast(Val{ℯ}(), d, False())
 @inline log2_fast(d::Union{Float32,Float64}) = log_fast(Val{2}(), d, False())
 @inline log10_fast(d::Union{Float32,Float64}) = log_fast(Val{10}(), d, False())
-@generated function log_fast(::Val{BASE}, x::VecUnroll{N,1,T,T}) where {N,T,BASE}
-    quote
-        $(Expr(:meta,:inline))
-        lx = log_fast(Val{$BASE}(), VectorizationBase.transpose_vecunroll(x))
-        VecUnroll(Base.Cartesian.@ntuple $(N+1) n -> lx(n))
-    end
-end
+# @generated function log_fast(::Val{BASE}, x::VecUnroll{N,1,T,T}) where {N,T,BASE}
+#     quote
+#         $(Expr(:meta,:inline))
+#         lx = log_fast(Val{$BASE}(), VectorizationBase.transpose_vecunroll(x))
+#         VecUnroll(Base.Cartesian.@ntuple $(N+1) n -> lx(n))
+#     end
+# end
 
diff --git a/test/accuracy.jl b/test/accuracy.jl
@@ -135,6 +135,14 @@
     tol = 1
     test_acc(T, fun_table, txx, tol)
 
+    xx1 = map(Tuple{T,T}, [(x,y) for x = 0:0.20:100, y = 0.1:0.20:100])[:];
+    xx2 = map(Tuple{T,T}, [(x,y) for x = 0:0.21:100, y = 0.1:0.22:100])[:];
+    xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000]);
+    txx = vcat(xx1, xx2, xx2);
+    fun_table = Dict(SLEEFPirates.pow_fast => Base.:^);
+    tol = 10
+    test_acc(T, fun_table, txx, tol)
+
 
     xx = map(T, vcat(prevfloat(0.0):0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:957)));
     fun_table = Dict(SLEEFPirates.cbrt_fast => Base.cbrt)

diff --git a/test/testsetup.jl b/test/testsetup.jl
@@ -233,3 +233,5 @@ function test_acc(T, fun_table, xx, tol; debug = false, tol_debug = 5)
     end
 end
 
+
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -233,3 +233,5 @@ function test_acc(T, fun_table, xx, tol; debug = false, tol_debug = 5)
		end
		end