Skip to content

Commit

Permalink
Fix for 32 bit part 2
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Nov 1, 2021
1 parent e5d987c commit cd28f54
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions src/TriangularSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ end
# @inline function solve_Wx3W!(ap::AbstractStridedPointer{T}, bp::AbstractStridedPointer{T}, U, rowoffset, coloffset, m::VectorizationBase.AbstractMask) where {T}
# WS = VectorizationBase.pick_vector_width(T)
# W = Int(WS)
# A11 = vload(bp, Unroll{2,1,W,1,W,0xffffffffffffffff,1}((rowoffset,coloffset)), m)
# A12 = vload(bp, Unroll{2,1,W,1,W,0xffffffffffffffff,1}((rowoffset,coloffset+WS)), m)
# A13 = vload(bp, Unroll{2,1,W,1,W,0xffffffffffffffff,1}((rowoffset,coloffset+WS+WS)), m)
# A11 = vload(bp, Unroll{2,1,W,1,W,(-1%UInt),1}((rowoffset,coloffset)), m)
# A12 = vload(bp, Unroll{2,1,W,1,W,(-1%UInt),1}((rowoffset,coloffset+WS)), m)
# A13 = vload(bp, Unroll{2,1,W,1,W,(-1%UInt),1}((rowoffset,coloffset+WS+WS)), m)

# A11, A12, A13 = solve_Wx3W(A11, A12, A13, U, WS)

# vstore!(ap, A11, Unroll{2,1,W,1,W,0xffffffffffffffff,1}((rowoffset,coloffset)), m)
# vstore!(ap, A12, Unroll{2,1,W,1,W,0xffffffffffffffff,1}((rowoffset,coloffset+WS)), m)
# vstore!(ap, A13, Unroll{2,1,W,1,W,0xffffffffffffffff,1}((rowoffset,coloffset+WS+WS)), m)
# vstore!(ap, A11, Unroll{2,1,W,1,W,(-1%UInt),1}((rowoffset,coloffset)), m)
# vstore!(ap, A12, Unroll{2,1,W,1,W,(-1%UInt),1}((rowoffset,coloffset+WS)), m)
# vstore!(ap, A13, Unroll{2,1,W,1,W,(-1%UInt),1}((rowoffset,coloffset+WS+WS)), m)
# end

# solve_3Wx3W!(A,B,U::UpperTriangular) = solve_3Wx3W!(A,B,parent(U))
Expand Down Expand Up @@ -226,15 +226,15 @@ end
quote
$(Expr(:meta,:inline))
# here, we just want to load the vectors
C11 = VectorizationBase.data(vload(spa, Unroll{2,1,$W,1,$W,0xffffffffffffffff,1}((StaticInt(0),n)), mask))
C11 = VectorizationBase.data(vload(spa, Unroll{2,1,$W,1,$W,(-1%UInt),1}((StaticInt(0),n)), mask))
Base.Cartesian.@nexprs $W c -> C11_c = C11[c]
for nk SafeCloseOpen(n) # nmuladd
A11 = vload(spc, (MM{$W}(StaticInt(0)),nk), mask)
Base.Cartesian.@nexprs $W c -> C11_c = vfnmadd_fast(A11, vload(spu, (nk,n+(c-1))), C11_c)
end
C11 = VecUnroll((Base.Cartesian.@ntuple $W C11))
C11 = solve_AU(C11, spu, n, Val{$UNIT}())
i = Unroll{2,1,$W,1,$W,0xffffffffffffffff,1}((StaticInt(0),n))
i = Unroll{2,1,$W,1,$W,(-1%UInt),1}((StaticInt(0),n))
$storecexpr
maybestore!(spb, C11, i, mask)
end
Expand Down

2 comments on commit cd28f54

@chriselrod
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/47864

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.8 -m "<description of version>" cd28f544ca2605692ff8d67ce9ba40c688280064
git push origin v0.1.8

Please sign in to comment.