diff --git a/Project.toml b/Project.toml index 343154a..71ce59a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TriangularSolve" uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf" authors = ["chriselrod and contributors"] -version = "0.1.4" +version = "0.1.5" [deps] CloseOpenIntervals = "fb6a15b2-703c-40df-9091-08a04967cfa9" @@ -18,13 +18,14 @@ CloseOpenIntervals = "0.1" IfElse = "0.1" LayoutPointers = "0.1.2" LoopVectorization = "0.12.30" -Polyester = "0.3, 0.4" +Polyester = "0.4" Static = "0.2, 0.3" -VectorizationBase = "0.20, 0.21" +VectorizationBase = "0.21" julia = "1.5" [extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Aqua", "Test"] diff --git a/src/TriangularSolve.jl b/src/TriangularSolve.jl index c31532d..7dc5d80 100644 --- a/src/TriangularSolve.jl +++ b/src/TriangularSolve.jl @@ -139,14 +139,14 @@ end end @inline store_small_kern!(spa, ::Nothing, v, spu, i, n, ::Val{false}) = vstore!(spa, v / vload(spu, (n,n)), i) -function BdivU_small_kern!(spa::AbstractStridedPointer{T}, sp, spb::AbstractStridedPointer{T}, spu::AbstractStridedPointer{T}, N, mask, ::Val{UNIT}) where {T,UNIT} - W = VectorizationBase.pick_vector_width(T) +function BdivU_small_kern!(spa::AbstractStridedPointer{T}, sp, spb::AbstractStridedPointer{T}, spu::AbstractStridedPointer{T}, N, mask::AbstractMask{W}, ::Val{UNIT}) where {T,UNIT,W} + # W = VectorizationBase.pick_vector_width(T) for n ∈ CloseOpen(N) - Amn = vload(spb, (MM(W, StaticInt(0)),n), mask) + Amn = vload(spb, (MM{W}(StaticInt(0)),n), mask) for k ∈ SafeCloseOpen(n) - Amn = vfnmadd_fast(vload(spa, (MM(W, StaticInt(0)),k), mask), vload(spu, (k,n)), Amn) + Amn = vfnmadd_fast(vload(spa, (MM{W}(StaticInt(0)),k), mask), vload(spu, (k,n)), Amn) end - store_small_kern!(spa, sp, Amn, spu, (MM(W, StaticInt(0)),n), n, mask, Val{UNIT}()) + store_small_kern!(spa, sp, Amn, spu, (MM{W}(StaticInt(0)),n), n, mask, Val{UNIT}()) end end function BdivU_small_kern_u!(spa::AbstractStridedPointer{T}, sp, spb::AbstractStridedPointer{T}, spu::AbstractStridedPointer{T}, N, ::StaticInt{U}, ::Val{UNIT}) where {T,U,UNIT} @@ -240,7 +240,7 @@ end end end -function rdiv_U!(spc::AbstractStridedPointer{T}, spa, spu, M, N, ::StaticInt{1}, ::Val{UNIT}) where {T,UNIT} +function rdiv_U!(spc::AbstractStridedPointer{T}, spa::AbstractStridedPointer, spu::AbstractStridedPointer, M, N, ::StaticInt{1}, ::Val{UNIT}) where {T,UNIT} WS = pick_vector_width(T) W = Int(WS) UF = unroll_factor(WS) @@ -291,8 +291,12 @@ const LDIVBUFFERS = Vector{UInt8}[] si = StrideIndex{2,(1,2),1}((VectorizationBase.static_sizeof(T), RSUF), (StaticInt(0),StaticInt(0))) stridedpointer(ptr, si, StaticInt{0}()) end +_canonicalize(x) = signed(x) +_canonicalize(::StaticInt{N}) where {N} = StaticInt{N}() function div_dispatch!(C::AbstractMatrix{T}, A, U, ::Val{UNIT}, ::Val{THREAD}) where {UNIT,T,THREAD} - M, N = size(A) + _M, _N = size(A) + M = _canonicalize(_M) + N = _canonicalize(_N) ((N == 0) | (M == 0)) && return nothing _spa, spap = stridedpointer_preserve(A) _spc, spcp = stridedpointer_preserve(C) @@ -475,7 +479,7 @@ function unroll_factor(::StaticInt{W}) where {W} ifelse(Static.lt(num_blocks, StaticInt{1}()), StaticInt{1}(), num_blocks) end -function rdiv_U!(spc::AbstractStridedPointer{T}, spa, spu, M, N, ::StaticInt, ::Val{UNIT}) where {T,UNIT} +function rdiv_U!(spc::AbstractStridedPointer{T}, spa::AbstractStridedPointer, spu::AbstractStridedPointer, M, N, ::StaticInt{var"#UNUSED#"}, ::Val{UNIT}) where {T,UNIT,var"#UNUSED#"} WS = pick_vector_width(T) W = Int(WS) UF = unroll_factor(WS) diff --git a/test/runtests.jl b/test/runtests.jl index 5027faf..ce37230 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -28,3 +28,7 @@ end test_solve(Float32) end end + +using Aqua +Aqua.test_all(TriangularSolve) +