diff --git a/Project.toml b/Project.toml index 07633d8..8fd87d8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SLEEFPirates" uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" authors = ["chriselrod "] -version = "0.6.22" +version = "0.6.23" [deps] IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" diff --git a/src/SLEEFPirates.jl b/src/SLEEFPirates.jl index 9ed596a..17d1cf8 100644 --- a/src/SLEEFPirates.jl +++ b/src/SLEEFPirates.jl @@ -128,16 +128,24 @@ include("misc.jl") # miscallenous math functions including pow and cbrt # fallback definitions +@generated function to_vecunrollscalar(v::Vec{W,T}, ::StaticInt{N}) where {N,W,T} + t = Expr(:tuple) + for n ∈ 0:N + push!(t.args, :(VectorizationBase.extractelement(v, $n))) + end + Expr(:block, Expr(:meta,:inline), :(VecUnroll($t))) +end for func in (:sin, :cos, :tan, :asin, :acos, :atan, :sinh, :cosh, :tanh, :asinh, :acosh, :atanh, :log, :log2, :log10, :log1p, :expm1, :cbrt, :sin_fast, :cos_fast, :tan_fast, :asin_fast, :acos_fast, :atan_fast,# :atan2_fast, :log_fast, :log2_fast, :log10_fast, :cbrt_fast)#, :exp, :exp2, :exp10 - @eval begin - $func(a::Float16) = Float16.($func(Float32(a))) - $func(x::Real) = $func(float(x)) - @inline $func(v::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v)) - @inline $func(i::MM) = $func(Vec(i)) - end + @eval begin + $func(a::Float16) = Float16.($func(Float32(a))) + $func(x::Real) = $func(float(x)) + @inline $func(v::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v)) + @inline $func(i::MM) = $func(Vec(i)) + @inline $func(v::VecUnroll{N,1,T,T}) where {N,T} = to_vecunrollscalar($func(VectorizationBase.transpose_vecunroll(v)), StaticInt{N}()) + end end # Tπ(::Type{T}) where {T} = promote_type(T, typeof(π))(π) for func ∈ (:sin, :cos) @@ -157,13 +165,17 @@ end @inline sincospi_fast(v::Vec{W,T}) where {W,T} = sincos_fast(T(π) * v) for func in (:sinh, :cosh, :tanh, :asinh, :acosh, :atanh, :log1p, :expm1)#, :exp, :exp2, :exp10 - @eval @inline Base.$func(x::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64,Int32,UInt32,Int64,UInt64}} = $func(x) - @eval @inline Base.$func(x::MM) = $func(Vec(x)) + @eval begin + @inline Base.$func(x::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64,Int32,UInt32,Int64,UInt64}} = $func(x) + @inline Base.$func(x::MM) = $func(Vec(x)) + end end for func ∈ (:sin, :cos, :tan, :asin, :acos, :atan, :log, :log2, :log10, :cbrt, :sincos) func_fast = Symbol(func, :_fast) - @eval @inline Base.$func(x::AbstractSIMD) = $func_fast(float(x)) - @eval @inline Base.FastMath.$func_fast(x::AbstractSIMD) = $func_fast(float(x)) + @eval begin + @inline Base.$func(x::AbstractSIMD) = $func_fast(float(x)) + @inline Base.FastMath.$func_fast(x::AbstractSIMD) = $func_fast(float(x)) + end end @inline Base.FastMath.atan_fast(a::T, b::Number) where {T<:AbstractSIMD} = atan_fast(a, T(b)) @inline Base.FastMath.atan_fast(a::Number, b::T) where {T<:AbstractSIMD} = atan_fast(T(a), b) @@ -197,11 +209,11 @@ max_tanh(::Type{Float64}) = 19.0615474653984959950960955322853986741878634050481 max_tanh(::Type{Float32}) = 9.010913339828708369989037671244720498805572920317272822795576296065428827978905f0 @inline function tanh_fast(x) - exp2xm1 = expm1_fast(Base.FastMath.add_fast(x, x)) - # Division is faster than approximate inversion in - # t = Base.FastMath.mul_fast(exp2xm1, Base.FastMath.inv_fast(Base.FastMath.add_fast(exp2xm1, typeof(x)(2)))) - t = Base.FastMath.div_fast(exp2xm1, Base.FastMath.add_fast(exp2xm1, typeof(x)(2))) - ifelse(abs(x) > max_tanh(eltype(x)), copysign(one(x), x), t) + exp2xm1 = expm1_fast(Base.FastMath.add_fast(x, x)) + # Division is faster than approximate inversion in + # t = Base.FastMath.mul_fast(exp2xm1, Base.FastMath.inv_fast(Base.FastMath.add_fast(exp2xm1, typeof(x)(2)))) + t = Base.FastMath.div_fast(exp2xm1, Base.FastMath.add_fast(exp2xm1, typeof(x)(2))) + ifelse(abs(x) > max_tanh(eltype(x)), copysign(one(x), x), t) end @inline Base.FastMath.tanh_fast(x::AbstractSIMD) = tanh_fast(x) # sigmoid_max(::Type{Float64}) = 36.42994775023704665301938332748370611415146834112402863375388447785857586583462 diff --git a/src/log.jl b/src/log.jl index 530809f..791fa95 100644 --- a/src/log.jl +++ b/src/log.jl @@ -304,11 +304,11 @@ end @inline log_fast(d::Union{Float32,Float64}) = log_fast(Val{ℯ}(), d, False()) @inline log2_fast(d::Union{Float32,Float64}) = log_fast(Val{2}(), d, False()) @inline log10_fast(d::Union{Float32,Float64}) = log_fast(Val{10}(), d, False()) -@generated function log_fast(::Val{BASE}, x::VecUnroll{N,1,T,T}) where {N,T,BASE} - quote - $(Expr(:meta,:inline)) - lx = log_fast(Val{$BASE}(), VectorizationBase.transpose_vecunroll(x)) - VecUnroll(Base.Cartesian.@ntuple $(N+1) n -> lx(n)) - end -end +# @generated function log_fast(::Val{BASE}, x::VecUnroll{N,1,T,T}) where {N,T,BASE} +# quote +# $(Expr(:meta,:inline)) +# lx = log_fast(Val{$BASE}(), VectorizationBase.transpose_vecunroll(x)) +# VecUnroll(Base.Cartesian.@ntuple $(N+1) n -> lx(n)) +# end +# end diff --git a/test/accuracy.jl b/test/accuracy.jl index 60c7c41..26e441a 100644 --- a/test/accuracy.jl +++ b/test/accuracy.jl @@ -135,6 +135,14 @@ tol = 1 test_acc(T, fun_table, txx, tol) + xx1 = map(Tuple{T,T}, [(x,y) for x = 0:0.20:100, y = 0.1:0.20:100])[:]; + xx2 = map(Tuple{T,T}, [(x,y) for x = 0:0.21:100, y = 0.1:0.22:100])[:]; + xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000]); + txx = vcat(xx1, xx2, xx2); + fun_table = Dict(SLEEFPirates.pow_fast => Base.:^); + tol = 10 + test_acc(T, fun_table, txx, tol) + xx = map(T, vcat(prevfloat(0.0):0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:957))); fun_table = Dict(SLEEFPirates.cbrt_fast => Base.cbrt) diff --git a/test/testsetup.jl b/test/testsetup.jl index 7a4a892..d36606c 100644 --- a/test/testsetup.jl +++ b/test/testsetup.jl @@ -233,3 +233,5 @@ function test_acc(T, fun_table, xx, tol; debug = false, tol_debug = 5) end end + +