Skip to content

Commit

Permalink
Transpose and untranspose VecUnroll{N,1,T,T}s
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Jul 26, 2021
1 parent 4571229 commit e3a4917
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "SLEEFPirates"
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
authors = ["chriselrod <[email protected]>"]
version = "0.6.22"
version = "0.6.23"

[deps]
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
Expand Down
42 changes: 27 additions & 15 deletions src/SLEEFPirates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,24 @@ include("misc.jl") # miscallenous math functions including pow and cbrt

# fallback definitions

@generated function to_vecunrollscalar(v::Vec{W,T}, ::StaticInt{N}) where {N,W,T}
t = Expr(:tuple)
for n 0:N
push!(t.args, :(VectorizationBase.extractelement(v, $n)))
end
Expr(:block, Expr(:meta,:inline), :(VecUnroll($t)))
end
for func in (:sin, :cos, :tan, :asin, :acos, :atan, :sinh, :cosh, :tanh,
:asinh, :acosh, :atanh, :log, :log2, :log10, :log1p, :expm1, :cbrt,
:sin_fast, :cos_fast, :tan_fast, :asin_fast, :acos_fast, :atan_fast,# :atan2_fast,
:log_fast, :log2_fast, :log10_fast, :cbrt_fast)#, :exp, :exp2, :exp10
@eval begin
$func(a::Float16) = Float16.($func(Float32(a)))
$func(x::Real) = $func(float(x))
@inline $func(v::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v))
@inline $func(i::MM) = $func(Vec(i))
end
@eval begin
$func(a::Float16) = Float16.($func(Float32(a)))
$func(x::Real) = $func(float(x))
@inline $func(v::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v))
@inline $func(i::MM) = $func(Vec(i))
@inline $func(v::VecUnroll{N,1,T,T}) where {N,T} = to_vecunrollscalar($func(VectorizationBase.transpose_vecunroll(v)), StaticInt{N}())
end
end
# Tπ(::Type{T}) where {T} = promote_type(T, typeof(π))(π)
for func (:sin, :cos)
Expand All @@ -157,13 +165,17 @@ end
@inline sincospi_fast(v::Vec{W,T}) where {W,T} = sincos_fast(T(π) * v)

for func in (:sinh, :cosh, :tanh, :asinh, :acosh, :atanh, :log1p, :expm1)#, :exp, :exp2, :exp10
@eval @inline Base.$func(x::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64,Int32,UInt32,Int64,UInt64}} = $func(x)
@eval @inline Base.$func(x::MM) = $func(Vec(x))
@eval begin
@inline Base.$func(x::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64,Int32,UInt32,Int64,UInt64}} = $func(x)
@inline Base.$func(x::MM) = $func(Vec(x))
end
end
for func (:sin, :cos, :tan, :asin, :acos, :atan, :log, :log2, :log10, :cbrt, :sincos)
func_fast = Symbol(func, :_fast)
@eval @inline Base.$func(x::AbstractSIMD) = $func_fast(float(x))
@eval @inline Base.FastMath.$func_fast(x::AbstractSIMD) = $func_fast(float(x))
@eval begin
@inline Base.$func(x::AbstractSIMD) = $func_fast(float(x))
@inline Base.FastMath.$func_fast(x::AbstractSIMD) = $func_fast(float(x))
end
end
@inline Base.FastMath.atan_fast(a::T, b::Number) where {T<:AbstractSIMD} = atan_fast(a, T(b))
@inline Base.FastMath.atan_fast(a::Number, b::T) where {T<:AbstractSIMD} = atan_fast(T(a), b)
Expand Down Expand Up @@ -197,11 +209,11 @@ max_tanh(::Type{Float64}) = 19.0615474653984959950960955322853986741878634050481
max_tanh(::Type{Float32}) = 9.010913339828708369989037671244720498805572920317272822795576296065428827978905f0

@inline function tanh_fast(x)
exp2xm1 = expm1_fast(Base.FastMath.add_fast(x, x))
# Division is faster than approximate inversion in
# t = Base.FastMath.mul_fast(exp2xm1, Base.FastMath.inv_fast(Base.FastMath.add_fast(exp2xm1, typeof(x)(2))))
t = Base.FastMath.div_fast(exp2xm1, Base.FastMath.add_fast(exp2xm1, typeof(x)(2)))
ifelse(abs(x) > max_tanh(eltype(x)), copysign(one(x), x), t)
exp2xm1 = expm1_fast(Base.FastMath.add_fast(x, x))
# Division is faster than approximate inversion in
# t = Base.FastMath.mul_fast(exp2xm1, Base.FastMath.inv_fast(Base.FastMath.add_fast(exp2xm1, typeof(x)(2))))
t = Base.FastMath.div_fast(exp2xm1, Base.FastMath.add_fast(exp2xm1, typeof(x)(2)))
ifelse(abs(x) > max_tanh(eltype(x)), copysign(one(x), x), t)
end
@inline Base.FastMath.tanh_fast(x::AbstractSIMD) = tanh_fast(x)
# sigmoid_max(::Type{Float64}) = 36.42994775023704665301938332748370611415146834112402863375388447785857586583462
Expand Down
14 changes: 7 additions & 7 deletions src/log.jl
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,11 @@ end
@inline log_fast(d::Union{Float32,Float64}) = log_fast(Val{ℯ}(), d, False())
@inline log2_fast(d::Union{Float32,Float64}) = log_fast(Val{2}(), d, False())
@inline log10_fast(d::Union{Float32,Float64}) = log_fast(Val{10}(), d, False())
@generated function log_fast(::Val{BASE}, x::VecUnroll{N,1,T,T}) where {N,T,BASE}
quote
$(Expr(:meta,:inline))
lx = log_fast(Val{$BASE}(), VectorizationBase.transpose_vecunroll(x))
VecUnroll(Base.Cartesian.@ntuple $(N+1) n -> lx(n))
end
end
# @generated function log_fast(::Val{BASE}, x::VecUnroll{N,1,T,T}) where {N,T,BASE}
# quote
# $(Expr(:meta,:inline))
# lx = log_fast(Val{$BASE}(), VectorizationBase.transpose_vecunroll(x))
# VecUnroll(Base.Cartesian.@ntuple $(N+1) n -> lx(n))
# end
# end

8 changes: 8 additions & 0 deletions test/accuracy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,14 @@
tol = 1
test_acc(T, fun_table, txx, tol)

xx1 = map(Tuple{T,T}, [(x,y) for x = 0:0.20:100, y = 0.1:0.20:100])[:];
xx2 = map(Tuple{T,T}, [(x,y) for x = 0:0.21:100, y = 0.1:0.22:100])[:];
xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000]);
txx = vcat(xx1, xx2, xx2);
fun_table = Dict(SLEEFPirates.pow_fast => Base.:^);
tol = 10
test_acc(T, fun_table, txx, tol)


xx = map(T, vcat(prevfloat(0.0):0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:957)));
fun_table = Dict(SLEEFPirates.cbrt_fast => Base.cbrt)
Expand Down
2 changes: 2 additions & 0 deletions test/testsetup.jl
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,5 @@ function test_acc(T, fun_table, xx, tol; debug = false, tol_debug = 5)
end
end



2 comments on commit e3a4917

@chriselrod
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/41558

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.6.23 -m "<description of version>" e3a4917dea3deefc5bbfa9a3fc2dbf9d8a6dd897
git push origin v0.6.23

Please sign in to comment.