From 266ddee58287f118731e020cba56960947ec09d7 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Sun, 23 May 2021 02:42:02 -0400 Subject: [PATCH] Less metaprogramming --- Project.toml | 5 +- src/SLEEFPirates.jl | 7 +-- src/estrin.jl | 138 +++++++++++++++++++++++++------------------- src/log.jl | 47 +++++++-------- src/misc.jl | 28 ++++----- src/priv.jl | 14 ++--- src/trig.jl | 16 ++--- 7 files changed, 137 insertions(+), 118 deletions(-) diff --git a/Project.toml b/Project.toml index 0293fa1..5ce71d9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,15 +1,16 @@ name = "SLEEFPirates" uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" authors = ["chriselrod "] -version = "0.6.19" +version = "0.6.20" [deps] IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" [compat] IfElse = "0.1" +Static = "0.2" VectorizationBase = "0.19.37, 0.20" julia = "1.5" diff --git a/src/SLEEFPirates.jl b/src/SLEEFPirates.jl index ef85105..81cc033 100644 --- a/src/SLEEFPirates.jl +++ b/src/SLEEFPirates.jl @@ -3,12 +3,11 @@ module SLEEFPirates using Base: llvmcall using Base.Math: uinttype, exponent_bias, exponent_mask, significand_bits, IEEEFloat, exponent_raw_max -using Libdl, VectorizationBase +using VectorizationBase +using Static: True, False, One, lt, StaticInt using VectorizationBase: vzero, AbstractSIMD, _Vec, fma_fast, data, VecUnroll, NativeTypes, FloatingTypes, vIEEEFloat, - vfmadd, vfnmadd, vfmsub, vfnmsub, True, False, One, - Double, dadd, dadd2, dsub, dsub2, dmul, dsqu, dsqrt, ddiv, drec, scale, - dnormalize + vfmadd, vfnmadd, vfmsub, vfnmsub, Double, dadd, dadd2, dsub, dsub2, dmul, dsqu, dsqrt, ddiv, drec, scale, dnormalize import IfElse: ifelse diff --git a/src/estrin.jl b/src/estrin.jl index 76c83b3..bb3f771 100644 --- a/src/estrin.jl +++ b/src/estrin.jl @@ -1,51 +1,51 @@ -@generated function estrin(x, p::NTuple{N}) where {N} - # N = length(p) - # log2N = VectorizationBase.intlog2(N) - ex = Expr(:block, Expr(:meta, :inline)) - Nfrac1 = N >> 1 - nextp = :p_1_ - nextx = :x_1 - Nfrac1 > 1 && push!(ex.args, :( $nextx = x * x )) - for n ∈ 1:Nfrac1 - push!(ex.args, :( $(Symbol(nextp,n)) = muladd(p[$(2n)],x,p[$(2n-1)]) ) ) - end - oddNfrac = isodd(N) - if oddNfrac - Nfrac1 += 1 - push!(ex.args, :( $(Symbol(nextp,Nfrac1)) = p[$N] )) - end - lastp = nextp - lastx = nextx - depth = 1 - while Nfrac1 > 1 - # while Nfrac1 > 1 - oddNfrac = isodd(Nfrac1) - Nfrac1 >>= 1 - depth += 1 - nextp = Symbol(:p_, depth, :_) - nextx = Symbol(:x_, depth) - (Nfrac1 > 1 || oddNfrac) && push!(ex.args, :( $nextx = $lastx * $lastx )) - for n ∈ 1:Nfrac1 - np = Symbol(nextp,n) - # @show lastp, n - lpu = Symbol(lastp,2n) - lpl = Symbol(lastp,2n-1) - push!(ex.args, :( $np = muladd($lpu,$lastx,$lpl) ) ) - end - if oddNfrac - Nfrac1 += 1 - push!(ex.args, :( $(Symbol(nextp,Nfrac1)) = $(Symbol(lastp,2Nfrac1-1)) )) - end +# @generated function estrin(x, p::NTuple{N}) where {N} +# # N = length(p) +# # log2N = VectorizationBase.intlog2(N) +# ex = Expr(:block, Expr(:meta, :inline)) +# Nfrac1 = N >> 1 +# nextp = :p_1_ +# nextx = :x_1 +# Nfrac1 > 1 && push!(ex.args, :( $nextx = Base.FastMath.mul_fast(x, x))) +# for n ∈ 1:Nfrac1 +# push!(ex.args, :( $(Symbol(nextp,n)) = muladd(p[$(2n)],x,p[$(2n-1)]) ) ) +# end +# oddNfrac = isodd(N) +# if oddNfrac +# Nfrac1 += 1 +# push!(ex.args, :( $(Symbol(nextp,Nfrac1)) = p[$N] )) +# end - lastp = nextp - lastx = nextx - end - ex -end +# lastp = nextp +# lastx = nextx +# depth = 1 +# while Nfrac1 > 1 +# # while Nfrac1 > 1 +# oddNfrac = isodd(Nfrac1) +# Nfrac1 >>= 1 +# depth += 1 +# nextp = Symbol(:p_, depth, :_) +# nextx = Symbol(:x_, depth) +# (Nfrac1 > 1 || oddNfrac) && push!(ex.args, :( $nextx = Base.FastMath.mul_fast($lastx, $lastx))) +# for n ∈ 1:Nfrac1 +# np = Symbol(nextp,n) +# # @show lastp, n +# lpu = Symbol(lastp,2n) +# lpl = Symbol(lastp,2n-1) +# push!(ex.args, :( $np = muladd($lpu,$lastx,$lpl) ) ) +# end +# if oddNfrac +# Nfrac1 += 1 +# push!(ex.args, :( $(Symbol(nextp,Nfrac1)) = $(Symbol(lastp,2Nfrac1-1)) )) +# end + +# lastp = nextp +# lastx = nextx +# end +# ex +# end # Given a `VecUnroll` argument, we'll instruction level parallelism that way and can thus forgo `estrin` -@inline estrin(x::VecUnroll, p::NTuple{N}) where {N} = evalpoly(x, p) # macro estrin(x, p...) # t = Expr(:tuple); foreach(pᵢ -> push!(t.args, pᵢ), p) @@ -56,22 +56,44 @@ end # end # end -macro horner(x, p...) - N = length(p) - ex = Expr(:call, :muladd, p[N], x, p[N-1]) - for n ∈ 2:N-1 - ex = Expr(:call, :muladd, ex, x, p[N-n]) - end - esc(ex) -end +# macro horner(x, p...) +# N = length(p) +# ex = Expr(:call, :muladd, p[N], x, p[N-1]) +# for n ∈ 2:N-1 +# ex = Expr(:call, :muladd, ex, x, p[N-n]) +# end +# esc(ex) +# end -@generated function evalpoly(x, p::Tuple{Vararg{Any,N}}) where {N} - ex = Expr(:call, :muladd, Expr(:ref, :p, N), :x, Expr(:ref, :p, N-1)) - for n ∈ 2:N-1 - ex = Expr(:call, :muladd, ex, :x, Expr(:ref, :p, N - n)) - end - Expr(:block, Expr(:meta, :inline), ex) +@inline evalpoly(x, p::Tuple{T1}) where {T1} = only(p) +@inline evalpoly(x, p::Tuple{T1,T2}) where {T1,T2} = muladd(p[2], x, p[1]) +@inline evalpoly(x, p::Tuple{T1,T2,T3,Vararg{Any,N}}) where {T1,T2,T3,N} = evalpoly(x, (ntuple(n -> p[n], Val(N+1))..., muladd(p[end], x, p[end-1]))) + +@inline estrin(x::VecUnroll, p::NTuple{N}) where {N} = evalpoly(x, p) +@inline estrin(x, p::Tuple{Vararg{Any,N}}) where {N} = estrin(x, p, StaticInt{N}() & StaticInt{3}(), lt(StaticInt{N}(), StaticInt(7))) +@inline estrin(x, p, r, ::True) = evalpoly(x, p) +@inline function estrin(x, p::Tuple{Vararg{Any,N}}, ::StaticInt{0}, ::False) where {N} + x2 = Base.FastMath.mul_fast(x, x) + x4 = Base.FastMath.mul_fast(x2, x2) + res = muladd(x2, muladd(x, p[end], p[end-1]), muladd(x, p[end-2], p[end-3])) + return _estrin(x, x2, x4, res, ntuple(n -> p[n], Val(N-4))) +end +@inline function estrin(x, p::Tuple{Vararg{Any,N}}, ::StaticInt{R}, ::False) where {N,R} + x2 = Base.FastMath.mul_fast(x, x) + x4 = Base.FastMath.mul_fast(x2, x2) + res = evalpoly(x, p[N-R+1:N]) + return _estrin(x, x2, x4, res, ntuple(n -> p[n], Val(N-R))) +end +@inline function __estrin(x, x2, x4, ex, p1, p2, p3, p4) + part = muladd(x2, muladd(x, p4, p3), muladd(x, p2, p1)) + muladd(x4, ex, part) end +@inline _estrin(x, x2, x4, ex, p::Tuple{}) = ex +@inline function _estrin(x, x2, x4, ex, p::Tuple{T1,T2,T3,T4,Vararg{Any,N}}) where {T1,T2,T3,T4,N} + ex = _estrin(x, x2, x4, ex, ntuple(n -> p[n+4], Val(N))) + __estrin(x, x2, x4, ex, p[1], p[2], p[3], p[4]) +end + # OscardSmith # https://github.com/JuliaLang/julia/blob/3253fb5a60ad841965eb6bd218921d55101c0842/base/special/expm1.jl diff --git a/src/log.jl b/src/log.jl index 6047266..b0958d9 100644 --- a/src/log.jl +++ b/src/log.jl @@ -94,23 +94,21 @@ end @inline function log_kernel(x::FloatType64) - c7 = 0.1532076988502701353 - c6 = 0.1525629051003428716 - c5 = 0.1818605932937785996 - c4 = 0.2222214519839380009 - c3 = 0.2857142932794299317 - c2 = 0.3999999999635251990 - c1 = 0.6666666666667333541 - # return @horner x c1 c2 c3 c4 c5 c6 c7 - @horner x c1 c2 c3 c4 c5 c6 c7 + c7 = 0.1532076988502701353 + c6 = 0.1525629051003428716 + c5 = 0.1818605932937785996 + c4 = 0.2222214519839380009 + c3 = 0.2857142932794299317 + c2 = 0.3999999999635251990 + c1 = 0.6666666666667333541 + evalpoly(x, (c1, c2, c3, c4, c5, c6, c7)) end @inline function log_kernel(x::FloatType32) - c3 = 0.3027294874f0 - c2 = 0.3996108174f0 - c1 = 0.6666694880f0 - # return @horner x c1 c2 c3 - @horner x c1 c2 c3 + c3 = 0.3027294874f0 + c2 = 0.3996108174f0 + c1 = 0.6666694880f0 + evalpoly(x, (c1, c2, c3)) end """ @@ -162,8 +160,7 @@ end # c3 = 0.399999999950799600689777 # c2 = 0.6666666666667778740063 # c1 = 2.0 -# # return @horner x c1 c2 c3 c4 c5 c6 c7 c8 -# @horner x c1 c2 c3 c4 c5 c6 c7 c8 +# evalpoly(x, (c1, c2, c3, c4, c5, c6, c7, c8)) # end @inline function log_fast_kernel(::Val{ℯ}, x::FloatType64) c1 = 1.999999999999999972288314133660764626502058106566467649677782135685758742482496766144698785351773083811113458922204637526163881038549205101773551870109094623210762917254877090109553039007279469903453824801461040391573804774824166566643949526292524754553263795026695443159712389662581644078252968607010603610968 @@ -175,8 +172,7 @@ end c7 = 0.1524699883651617684758546885725609890936381489527976937281307452249553913565153843250171402249265467319884679549763243177823555571966975970148692848138108033225746264903112492670940278404127063267504949155893965147469747805989675713477911533708565646356998159446756108541482683919621746752450341943950918560444 c8 = 0.1538953296978747663739188355461749838905796367288249763220945925738162292410415558446630046977678608900931196974342765569666836704427957456516824211567717339130678264230162068823354062924440216282160834915006269368499669546218116339647782657742158043373280480925865755011661730676024105585113850596556788214285 - # return @horner x c1 c2 c3 c4 c5 c6 c7 c8 - @horner x c1 c2 c3 c4 c5 c6 c7 c8 + evalpoly(x, (c1, c2, c3, c4, c5, c6, c7, c8)) end @inline function log_fast_kernel(::Val{2}, x::FloatType64) c1 = 2.885390081777926774740337587963391752686470697574765996133470083444891120861519512296991622220723412089079177191231548172714708142318482235816576480680610952734785448442320447132165628681849517673133529587846690979418974880507488664900789156089097059610996054569326290084125678471502654166548446893122338532023 @@ -187,8 +183,8 @@ end c6 = 0.2623764049675897374399611828075795820528035043402834678195862228934527298347670603110363990323094620293616051801837703306154765947894231616030376001347124009763901282915858969582585714993674594044171879858797430669924758656504158059304709147122269898951359911582928703927632984965613508778815160634440821006823 c7 = 0.2199676960988168325549661966995124554567674873746539020720831896109731813706126058088861721617649492103067104363793480846581954569700692779507905838002757047383852895766988734919935420040379293796738154168364864473763605523658805065039735690708281857109533843972417256743512773874125120066398926457479452994093 c8 = 0.2220240289710959406538132498293188569978091951805264791950528071212183891654240675907318516066736410222500677207442838638375330590597400913020365613995393179596880082388555123835749955456521729694538056168216831175849919088444866170319155410834510245750708062346271728171344176144233493167586972292956621704167 - # return @horner x c1 c2 c3 c4 c5 c6 c7 c8 - @horner x c1 c2 c3 c4 c5 c6 c7 c8 + + evalpoly(x, (c1, c2, c3, c4, c5, c6, c7, c8)) end @inline function log_fast_kernel(::Val{10}, x::FloatType64) @@ -200,7 +196,8 @@ end c6 = 0.07898316804972451278986704059544004657534078944771282690278469570981935772429141983258957479757541438522692317478446375357141436448959705067029890831066506672402437452874301364583400492390678050347427582721177656604136026871846918722378134086776956936760280411925008797490847304302379623068922461071133054013859 c7 = 0.06621687460284276437404220222283889587447460505118960613559060231236406623915686648073020335416144993466970222767859395725920508224420109118449188766790585839731181487203895006109028946170265819774012459305958251419929926567321073232894818683941128265790381145874410375046038091980215817264367754587024710978986 c8 = 0.0668358924784686462819357730165824743917341557202951962573347574755028338084519992412011915655131252999581138520558248266369770803727027591714823269708741953473958951570623295145882252663691038603224938441400683179259967167971897078905444705976854175284181780118155805554122865814034985253134445374766613739187 - @horner x c1 c2 c3 c4 c5 c6 c7 c8 + + evalpoly(x, (c1, c2, c3, c4, c5, c6, c7, c8)) end @inline function log_fast_kernel(::Val{ℯ}, x::FloatType32) @@ -209,8 +206,8 @@ end c3 = 0.400005877017974853515625f0 c2 = 0.666666686534881591796875f0 c1 = 2f0 - # return @horner x c1 c2 c3 c4 c5 - @horner x c1 c2 c3 c4 c5 + + evalpoly(x, (c1, c2, c3, c4, c5)) end @inline function log_fast_kernel(::Val{2}, x::FloatType32) c1 = 2.88539008183608973931111465797605163855459400351399679091670150619994307807932305709230255880455898882523001788391899491983367652635183189227843545539636871821101647200186975131915853092890583610850196033734515715697697545252303451919374844119991942911339078566733293749421229056798365084549286067828816391335f0 @@ -218,7 +215,7 @@ end c3 = 0.577092346895436124496418053994048368787454897109979847565300121654461075967303615264336356613865144091476468885553992850165141154633791757163879758533181288454326796486333639237352641170689759463369795606769823631738673348747856212036970422229498298065338996708007695849071708840167941742938607388858010144124f0 c4 = 0.4112062774884430872292144102146583634128259952701992109373810141836243377352557453727961104893802593899034166502781198307239642169574364520342372597806296006723519823066460247328403371685464127344456586539860933393102297655759579144420040193328399480261756827846184101221968710775674920677966868480884154531906f0 c5 = 0.3483929578463614140008807858571049228792841000218239434906676609692043222435549289484194004801943792995337270459610071866199644093805754623761881367467842177198732930226925274643266617665940920235915835781226498423330347120803047035104419586882902933922994002620011702649787056467119273317636032117141887959942f0 - @horner x c1 c2 c3 c4 c5 + evalpoly(x, (c1, c2, c3, c4, c5)) end @inline function log_fast_kernel(::Val{10}, x::FloatType32) c1 = 0.8685889638240124402397708951106502262202360135356954454698763954975124221040424849066825167876820875325127784493624787542409796165383940126454679616909814259580253262132903371754228716156380963198674619876749182361498397961445142358261199400985591909478187454632084248453424851342102079761821940780603198545616f0 @@ -226,7 +223,7 @@ end c3 = 0.1737221066836498683203094026804327752209318893377756731259494918401339926199681736692287989621095193583635545978137901741612449570980653896864502278390604987366493020260231450208306389176678539877072993190077953368719548863200253103843475249767868540737686876012511040200964468493328048981851042321833775224372f0 c4 = 0.1237854239293478707125637927629425829204203853315914667487437505152964085560979536798787545674711702014151928256831535140556118705532892713919312267005970112932094938487205764203670341705365514082680822781583742792219266327012272008036123492184419119821962482310107701115807828778871264690149628141482055104599f0 c5 = 0.1048767305898517597797548767722330999824230472712341470034472461777359592830413739746852188570789846396677304208657150772765766584594750277172294014192856723183380665073137335352084472870754706334151185602395485903693315152058182538357857178779149581856966210549355606555462782599244992615178411271029676590258f0 - @horner x c1 c2 c3 c4 c5 + evalpoly(x, (c1, c2, c3, c4, c5)) end """ diff --git a/src/misc.jl b/src/misc.jl index f644bb4..1d7fff1 100644 --- a/src/misc.jl +++ b/src/misc.jl @@ -31,22 +31,22 @@ end @inline function cbrt_kernel(x::FloatType64) - c6d = -0.640245898480692909870982 - c5d = 2.96155103020039511818595 - c4d = -5.73353060922947843636166 - c3d = 6.03990368989458747961407 - c2d = -3.85841935510444988821632 - c1d = 2.2307275302496609725722 - @horner x c1d c2d c3d c4d c5d c6d + c6 = -0.640245898480692909870982 + c5 = 2.96155103020039511818595 + c4 = -5.73353060922947843636166 + c3 = 6.03990368989458747961407 + c2 = -3.85841935510444988821632 + c1 = 2.2307275302496609725722 + evalpoly(x, (c1, c2, c3, c4, c5, c6)) end @inline function cbrt_kernel(x::FloatType32) - c6f = -0.601564466953277587890625f0 - c5f = 2.8208892345428466796875f0 - c4f = -5.532182216644287109375f0 - c3f = 5.898262500762939453125f0 - c2f = -3.8095417022705078125f0 - c1f = 2.2241256237030029296875f0 - @horner x c1f c2f c3f c4f c5f c6f + c6 = -0.601564466953277587890625f0 + c5 = 2.8208892345428466796875f0 + c4 = -5.532182216644287109375f0 + c3 = 5.898262500762939453125f0 + c2 = -3.8095417022705078125f0 + c1 = 2.2241256237030029296875f0 + evalpoly(x, (c1, c2, c3, c4, c5, c6)) end """ Algorithm: diff --git a/src/priv.jl b/src/priv.jl index c0eac17..b123299 100644 --- a/src/priv.jl +++ b/src/priv.jl @@ -256,13 +256,13 @@ const under_expk(::Type{Float32}) = -104f0 return estrin(x, (c1, c2, c3, c4, c5, c6, c7, c8, c9, c10)) end -@inline function expk_kernel(x::FloatType32) +@inline function expk_kernel(x::FloatType32) c5 = 0.00136324646882712841033936f0 c4 = 0.00836596917361021041870117f0 c3 = 0.0416710823774337768554688f0 c2 = 0.166665524244308471679688f0 c1 = 0.499999850988388061523438f0 - return @horner x c1 c2 c3 c4 c5 + return evalpoly(x, (c1, c2, c3, c4, c5)) end @inline function expk(d::Double{V}) where {V <: vIEEEFloat} @@ -303,13 +303,13 @@ end return dadd(dmul(x, u), c1) end -@inline function expk2_kernel(x::Double{<:FloatType32}) +@inline function expk2_kernel(x::Double{<:FloatType32}) c5 = 0.1980960224f-3 c4 = 0.1394256484f-2 c3 = 0.8333456703f-2 c2 = 0.4166637361f-1 c1 = 0.166666659414234244790680580464f0 - u = @horner x.hi c2 c3 c4 c5 + u = evalpoly(x.hi, (c2, c3, c4, c5)) return dadd(dmul(x, u), c1) end @@ -343,7 +343,7 @@ end c3 = 0.285714285511134091777308 c2 = 0.400000000000914013309483 c1 = 0.666666666666664853302393 - return @horner x c1 c2 c3 c4 c5 c6 c7 c8 + return evalpoly(x, (c1, c2, c3, c4, c5, c6, c7, c8)) end @inline function logk2_kernel(x::FloatType32) @@ -351,7 +351,7 @@ end c3 = 0.285112679004669189453125f0 c2 = 0.400007992982864379882812f0 c1 = 0.666666686534881591796875f0 - return @horner x c1 c2 c3 c4 + return evalpoly(x, (c1, c2, c3, c4)) end @inline function logk2(d::Double{V}) where {V <: vIEEEFloat} @@ -393,7 +393,7 @@ end c3 = 0.285112679004669189453125f0 c2 = 0.400007992982864379882812f0 c1 = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9) - dadd(dmul(x, @horner x.hi c2 c3 c4), c1) + dadd(dmul(x, evalpoly(x.hi, (c2, c3, c4))), c1) end logkmul(::Type{Float64}) = 1.8446744073709551616e19 diff --git a/src/trig.jl b/src/trig.jl index c4d8153..cd8ea22 100644 --- a/src/trig.jl +++ b/src/trig.jl @@ -31,7 +31,7 @@ end c3 = -0.0001981069071916863322258f0 c2 = 0.00833307858556509017944336f0 c1 = -0.166666597127914428710938f0 - return dadd(c1, (x.hi * (@horner x.hi c2 c3 c4))) + return dadd(c1, (x.hi * (evalpoly(x.hi, (c2, c3, c4))))) end @inline function sin(d::V) where V <: FloatType64 @@ -189,7 +189,7 @@ end c3 = -0.0001981069071916863322258f0 c2 = 0.00833307858556509017944336f0 c1 = -0.166666597127914428710938f0 - return @horner x c1 c2 c3 c4 + return evalpoly(x, (c1, c2, c3, c4)) end @inline function sin_fast(d::FloatType64) @@ -329,14 +329,14 @@ function sincos_fast end a3 = -0.000198412698278911770864914 a2 = 0.0083333333333191845961746 a1 = -0.166666666666666130709393 - return @horner x a1 a2 a3 a4 a5 a6 + return evalpoly(x, (a1, a2, a3, a4, a5, a6)) end @inline function sincos_a_kernel(x::FloatType32) a3 = -0.000195169282960705459117889f0 a2 = 0.00833215750753879547119141f0 a1 = -0.166666537523269653320312f0 - return @horner x a1 a2 a3 + return evalpoly(x, (a1, a2, a3)) end @inline function sincos_b_kernel(x::FloatType64) @@ -347,7 +347,7 @@ end b3 = -0.00138888888888714019282329 b2 = 0.0416666666666665519592062 b1 = -0.50 - return @horner x b1 b2 b3 b4 b5 b6 b7 + return evalpoly(x, (b1, b2, b3, b4, b5, b6, b7)) end @inline function sincos_b_kernel(x::FloatType32) @@ -356,7 +356,7 @@ end b3 = -0.00138888787478208541870117f0 b2 = 0.0416666641831398010253906f0 b1 = -0.5f0 - return @horner x b1 b2 b3 b4 b5 + return evalpoly(x, (b1, b2, b3, b4, b5)) end @inline function sincos_fast(d::FloatType64) @@ -596,7 +596,7 @@ end c3 = 0.0540687143802642822265625f0 c2 = 0.133325666189193725585938f0 c1 = 0.33333361148834228515625f0 - return @horner x c1 c2 c3 c4 c5 c6 c7 + return evalpoly(x, (c1, c2, c3, c4, c5, c6, c7)) end @inline function tan_fast(d::FloatType64) @@ -688,7 +688,7 @@ end c3 = 0.0540687143802642822265625f0 c2 = 0.133325666189193725585938f0 c1 = 0.33333361148834228515625f0 - return dadd(c1, x.hi * (@horner x.hi c2 c3 c4 c5 c6 c7)) + return dadd(c1, x.hi * evalpoly(x.hi, (c2, c3, c4, c5, c6, c7))) end @inline function tan(d::V) where V <: FloatType64