better integer support

JuliaSIMD · Sep 30, 2022 · 4a5c5ac · 4a5c5ac · chriselrod · Sep 30, 2022
1 parent e838c43
commit 4a5c5ac
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 22 deletions.
diff --git a/src/SLEEFPirates.jl b/src/SLEEFPirates.jl
@@ -163,8 +163,8 @@ for func in (:sin, :cos, :tan, :asin, :acos, :atan, :sinh, :cosh, :tanh,
              :sin_fast, :cos_fast, :tan_fast, :asin_fast, :acos_fast, :atan_fast,# :atan2_fast,
              :log_fast, :log2_fast, :log10_fast, :cbrt_fast)#, :exp, :exp2, :exp10
   @eval begin
-    $func(a::Float16) = Float16.($func(Float32(a)))
-    $func(x::Real) = $func(float(x))
+    @inline $func(a::Float16) = Float16.($func(Float32(a)))
+    @inline $func(x::Real) = $func(float(x))
     @inline $func(v::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v))
     @inline $func(i::MM) = $func(Vec(i))
     @inline $func(v::VecUnroll{N,1,T,T}) where {N,T} = to_vecunrollscalar($func(VectorizationBase.transpose_vecunroll(v)), StaticInt{N}())
@@ -205,22 +205,22 @@ end
 @inline Base.FastMath.atan_fast(a::T, b::T) where {T<:AbstractSIMD} = atan_fast(a, b)
 @inline Base.FastMath.atan_fast(a::AbstractSIMD, b::AbstractSIMD) = ((c,d) = promote(a,b); atan_fast(c, d))
 for func in (:atan, :hypot, :pow)
-    func2 = func === :pow ? :^ : func
-    ptyp = func === :pow ? :FloatingTypes : :NativeTypes
-    @eval begin
-        $func(y::Real, x::Real) = $func(promote(float(y), float(x))...)
-        $func(a::Float16, b::Float16) = Float16($func(Float32(a), Float32(b)))
-        # @inline Base.$func2(x::AbstractSIMD{W,T}, y::Vec{W,T}) where {W,T<:Union{Float32,Float64}} = $func(x, Vec(y))
-        # @inline Base.$func2(x::Vec{W,T}, y::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64}} = $func(Vec(x), y)
-        @inline Base.$func2(x::AbstractSIMD{W,T}, y::T) where {W,T<:Union{Float32,Float64}} = $func(x, convert(Vec{W,T}, y))
-        @inline Base.$func2(x::T, y::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64}} = $func(convert(Vec{W,T}, x), y)
-        @inline Base.$func2(x::AbstractSIMD{W,T1}, y::T2) where {W,T1<:Union{Float32,Float64},T2<:$ptyp} = $func(x, convert(Vec{W,T1}, y))
-        @inline Base.$func2(x::T2, y::AbstractSIMD{W,T1}) where {W,T1<:Union{Float32,Float64},T2<:NativeTypes} = $func(convert(Vec{W,T1}, x), y)
-        @inline Base.$func2(x::AbstractSIMD{W,T}, y::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64}} = $func(x, y)
-        @inline $func(v1::AbstractSIMD{W,I}, v2::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v1), float(v2))
-    end
+  func2 = func === :pow ? :^ : func
+  ptyp = func === :pow ? :FloatingTypes : :NativeTypes
+  @eval begin
+    @inline $func(y::Real, x::Real) = $func(promote(float(y), float(x))...)
+    @inline $func(a::Float16, b::Float16) = Float16($func(Float32(a), Float32(b)))
+    # @inline Base.$func2(x::AbstractSIMD{W,T}, y::Vec{W,T}) where {W,T<:Union{Float32,Float64}} = $func(x, Vec(y))
+    # @inline Base.$func2(x::Vec{W,T}, y::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64}} = $func(Vec(x), y)
+    @inline Base.$func2(x::AbstractSIMD{W,T}, y::T) where {W,T<:Union{Float32,Float64}} = $func(x, convert(Vec{W,T}, y))
+    @inline Base.$func2(x::T, y::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64}} = $func(convert(Vec{W,T}, x), y)
+    @inline Base.$func2(x::AbstractSIMD{W,T1}, y::T2) where {W,T1<:Union{Float32,Float64},T2<:$ptyp} = $func(x, convert(Vec{W,T1}, y))
+    @inline Base.$func2(x::T2, y::AbstractSIMD{W,T1}) where {W,T1<:Union{Float32,Float64},T2<:NativeTypes} = $func(convert(Vec{W,T1}, x), y)
+    @inline Base.$func2(x::AbstractSIMD{W,T}, y::AbstractSIMD{W,T}) where {W,T<:Union{Float32,Float64}} = $func(x, y)
+    @inline $func(v1::AbstractSIMD{W,I}, v2::AbstractSIMD{W,I}) where {W,I<:Integer} = $func(float(v1), float(v2))
+  end
 end
-ldexp(x::Float16, q::Int) = Float16(ldexpk(Float32(x), q))
+@inline ldexp(x::Float16, q::Int) = Float16(ldexpk(Float32(x), q))
 
 # @inline logit(x) = log(Base.FastMath.div_fast(x,Base.FastMath.sub_fast(one(x),x)))
 # @inline invlogit(x) = Base.FastMath.inv_fast(Base.FastMath.add_fast(one(x), exp(Base.FastMath.sub_fast(x))))
@@ -231,7 +231,7 @@ ldexp(x::Float16, q::Int) = Float16(ldexpk(Float32(x), q))
 max_tanh(::Type{Float64}) = 19.06154746539849599509609553228539867418786340504817671278462587964799037885145
 max_tanh(::Type{Float32}) = 9.010913339828708369989037671244720498805572920317272822795576296065428827978905f0
 
-@inline function tanh_fast(x::AbstractSIMD{W,Float32}) where {W}
+@inline function tanh_fast(x::Union{Float32,AbstractSIMD{<:Any,Float32}})
   # stolen from https://github.com/FluxML/NNlib.jl/pull/345
   # https://github.com/FluxML/NNlib.jl/blob/5dd04df4e95f9f9b70d6232fac546f3e98899fc2/src/activations.jl#L766-L773
   x2 = abs2(x)
@@ -245,7 +245,7 @@ max_tanh(::Type{Float32}) = 9.01091333982870836998903767124472049880557292031727
   d = muladd(d2, x2, 1.0f0)
   ifelse(x2 < 66f0, @fastmath(x * (n / d)), sign(x))
 end
-@inline function tanh_fast(x::AbstractSIMD{W,Float64}) where {W}
+@inline function tanh_fast(x::Union{Float64,AbstractSIMD{<:Any,Float64}})
   exp2xm1 = expm1_fast(Base.FastMath.add_fast(x, x))
   # Division is faster than approximate inversion in
   # t = Base.FastMath.mul_fast(exp2xm1, Base.FastMath.inv_fast(Base.FastMath.add_fast(exp2xm1, typeof(x)(2))))
@@ -254,6 +254,12 @@ end
 end
 @inline tanh_fast(x::IntegerType) = tanh_fast(float(x))
 @inline Base.FastMath.tanh_fast(x::AbstractSIMD) = tanh_fast(x)
+@inline function Base.:(^)(
+  x::AbstractSIMD{W,<:Base.BitInteger},
+  y::AbstractSIMD{W,<:Base.BitInteger}
+) where {W}
+  float(x) ^ y
+end
 # sigmoid_max(::Type{Float64}) = 36.42994775023704665301938332748370611415146834112402863375388447785857586583462
 # sigmoid_max(::Type{Float32}) = 17.3286794841963099036462718631317335849086302638474573162299687307067828965093f0
 

diff --git a/src/trig.jl b/src/trig.jl
@@ -191,7 +191,6 @@ end
     c1 = -0.166666597127914428710938f0
     return evalpoly(x, (c1, c2, c3, c4))
 end
-
 @inline function sin_fast(d::FloatType64)
   T = eltype(d)
   I = fpinttype(T)
@@ -234,7 +233,8 @@ end
 
   return u
 end
-
+@inline sincos(x::IntegerType) = sincos(float(x))
+@inline sincos_fast(x::IntegerType) = sincos_fast(float(x))
 @inline function sin_fast(d::FloatType32)
     T = eltype(d)
     I = fpinttype(T)
@@ -904,7 +904,7 @@ Compute the inverse tangent of `x/y`, using the signs of both `x` and `y` to det
     # return ifelse(isnan(y) | isnan(x), T(NaN), flipsign(r, x))
     flipsign(r, x)
 end
-
+@inline atan_fast(a, b) = atan_fast(float(a), float(b))
 
 
 """

diff --git a/test/testsetup.jl b/test/testsetup.jl
@@ -145,6 +145,15 @@ function test_vector(xfun, fun, ::Union{Val{W},SLEEFPirates.VectorizationBase.St
     @test maximum(countulp.(t1, t2)) ≤ tol
     @test maximum(countulp.(tu1, tu2)) ≤ tol
   end
+  vui = round(SLEEFPirates.inttype(T), vu);
+  tu3 = tovector(xfun(vui));
+  tu4 = tovector(xfun(float(vui)));
+  @test maximum(countulp.(tu3, tu4)) ≤ tol
+  vxi = round(SLEEFPirates.inttype(T), vxes1);
+  tx3 = tovector(xfun(vxi));
+  tx4 = tovector(xfun(float(vxi)));
+  @test maximum(countulp.(tx3, tx4)) ≤ tol
+  nothing
 end
 vbig(x) = big.(x)
 function test_vector(xfun, fun, ::Union{Val{W},SLEEFPirates.VectorizationBase.StaticInt{W}}, xf::NTuple{N,T}, xl::NTuple{N,T}, tol, broken::Bool) where {W,N,T}
@@ -190,6 +199,15 @@ function test_vector(xfun, fun, ::Union{Val{W},SLEEFPirates.VectorizationBase.St
   else
     @test_broken maximum(countulp.(tu1, tu2)) ≤ tol
   end
+  vui = map(Base.Fix1(round,SLEEFPirates.inttype(T)), vu);
+  tu3 = tovector(xfun(vui...));
+  tu4 = tovector(xfun(map(float,vui)...));
+  @test maximum(countulp.(tu3, tu4)) ≤ tol
+  vxi = map(Base.Fix1(round,SLEEFPirates.inttype(T)), vxes1);
+  tx3 = tovector(xfun(vxi...));
+  tx4 = tovector(xfun(map(float,vxi)...));
+  @test maximum(countulp.(tx3, tx4)) ≤ tol
+  nothing
 end
 function test_function_acc(::Type{T}, xfun::F1, fun::F2, xx, tol, debug, tol_debug, broken) where {T,F1,F2}
   rmax = 0.0