Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SpMSpVMasked #96

Merged
merged 6 commits into from
Jun 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 14 additions & 23 deletions src/GraphBLAS-sharp.Backend/Algorithms/BFS.fs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ module internal BFS =
Operations.SpMVInPlace add mul clContext workGroupSize

let spMSpV =
Operations.SpMSpVBool add mul clContext workGroupSize
Operations.SpMSpVMaskedBool add mul clContext workGroupSize

let zeroCreate =
Vector.zeroCreate clContext workGroupSize
Expand All @@ -145,13 +145,11 @@ module internal BFS =
let maskComplementedInPlace =
Vector.map2InPlace Mask.complementedOp clContext workGroupSize

let maskComplemented =
Vector.map2Sparse Mask.complementedOp clContext workGroupSize

let fillSubVectorInPlace =
Vector.assignByMaskInPlace (Mask.assign) clContext workGroupSize

let toSparse = Vector.toSparse clContext workGroupSize
let toSparse =
Vector.toSparseUnsorted clContext workGroupSize

let toDense = Vector.toDense clContext workGroupSize

Expand Down Expand Up @@ -190,28 +188,21 @@ module internal BFS =
match frontier with
| ClVector.Sparse _ ->
//Getting new frontier
match spMSpV queue matrix frontier with
match spMSpV queue matrix frontier levels with
| None ->
frontier.Dispose()
stop <- true
| Some newFrontier ->
| Some newMaskedFrontier ->
frontier.Dispose()
//Filtering visited vertices
match maskComplemented queue DeviceOnly newFrontier levels with
| None ->
stop <- true
newFrontier.Dispose()
| Some newMaskedFrontier ->
newFrontier.Dispose()

//Push/pull
let NNZ = getNNZ queue newMaskedFrontier

if (push NNZ newMaskedFrontier.Size) then
frontier <- newMaskedFrontier
else
frontier <- toDense queue DeviceOnly newMaskedFrontier
newMaskedFrontier.Dispose()

//Push/pull
let NNZ = getNNZ queue newMaskedFrontier

if (push NNZ newMaskedFrontier.Size) then
frontier <- newMaskedFrontier
else
frontier <- toDense queue DeviceOnly newMaskedFrontier
newMaskedFrontier.Dispose()
| ClVector.Dense oldFrontier ->
//Getting new frontier
spMVInPlace queue matrix frontier frontier
Expand Down
38 changes: 28 additions & 10 deletions src/GraphBLAS-sharp.Backend/Common/ClArray.fs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ module ClArray =

let gid = ndRange.GlobalID0

if gid < length then
if gid < length && not result.Value then
let isExist = (%predicate) vector.[gid]

if isExist then result.Value <- true @>
Expand Down Expand Up @@ -902,22 +902,40 @@ module ClArray =

let count<'a> (predicate: Expr<'a -> bool>) (clContext: ClContext) workGroupSize =

let sum =
Reduce.reduce <@ (+) @> clContext workGroupSize
let count =
<@ fun (ndRange: Range1D) (length: int) (array: ClArray<'a>) (count: ClCell<int>) ->
let gid = ndRange.GlobalID0
let mutable countLocal = 0
let step = ndRange.GlobalWorkSize

let mutable i = gid

let getBitmap =
Map.map<'a, int> (Map.predicateBitmap predicate) clContext workGroupSize
while i < length do
let res = (%predicate) array.[i]
if res then countLocal <- countLocal + 1
i <- i + step

atomic (+) count.Value countLocal |> ignore @>

let count = clContext.Compile count

fun (processor: RawCommandQueue) (array: ClArray<'a>) ->

let bitmap = getBitmap processor DeviceOnly array
let result = clContext.CreateClCell<int>(0)

let result =
(sum processor bitmap).ToHostAndFree processor
let numberOfGroups =
Utils.divUpClamp array.Length workGroupSize 1 1024

bitmap.Free()
let ndRange =
Range1D.CreateValid(workGroupSize * numberOfGroups, workGroupSize)

result
let kernel = count.GetKernel()

kernel.KernelFunc ndRange array.Length array result

processor.RunKernel kernel

result.ToHostAndFree processor

/// <summary>
/// Builds a new array whose elements are the results of applying the given function
Expand Down
4 changes: 3 additions & 1 deletion src/GraphBLAS-sharp.Backend/Common/Sort/Bitonic.fs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module Bitonic =
int (clContext.ClDevice.LocalMemSize)
/ (sizeof<uint64> + sizeof<'a>)
)
/ 2

let maxThreadsPerBlock =
min (clContext.ClDevice.MaxWorkGroupSize) (localSize / 2)
Expand Down Expand Up @@ -257,6 +258,7 @@ module Bitonic =
int (clContext.ClDevice.LocalMemSize)
/ (sizeof<int> + sizeof<'a>)
)
/ 2

let maxThreadsPerBlock =
min (clContext.ClDevice.MaxWorkGroupSize) (localSize / 2)
Expand Down Expand Up @@ -476,4 +478,4 @@ module Bitonic =

kernelGlobal.KernelFunc ndRangeGlobal rows values values.Length (localSize * 2)

queue.RunKernel(kernelGlobal)
queue.RunKernel(kernelGlobal)
4 changes: 4 additions & 0 deletions src/GraphBLAS-sharp.Backend/Common/Utils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ module internal Utils =
>> fun x -> x ||| (x >>> 16)
>> fun x -> x + 1

let divUp x y = x / y + (if x % y = 0 then 0 else 1)

let divUpClamp x y left right = min (max (divUp x y) left) right

let floorToMultiple multiple x = x / multiple * multiple

let ceilToMultiple multiple x = ((x - 1) / multiple + 1) * multiple
Expand Down
46 changes: 45 additions & 1 deletion src/GraphBLAS-sharp.Backend/Operations/Operations.fs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@
| _ -> failwith "Not implemented yet"

/// <summary>
/// CSR Matrix - sparse vector multiplication. Optimized for bool OR and AND operations.
/// CSR Matrix - sparse vector multiplication. Optimized for bool OR and AND operations by skipping reduction stage.
/// </summary>
/// <param name="add">Type of binary function to reduce entries.</param>
/// <param name="mul">Type of binary function to combine entries.</param>
Expand All @@ -352,6 +352,50 @@
| ClMatrix.CSR m, ClVector.Sparse v -> Option.map ClVector.Sparse (run queue m v)
| _ -> failwith "Not implemented yet"

/// <summary>
/// CSR Matrix - sparse vector multiplication with mask. Mask is complemented.
/// </summary>
/// <param name="add">Type of binary function to reduce entries.</param>
/// <param name="mul">Type of binary function to combine entries.</param>
/// <param name="clContext">OpenCL context.</param>
/// <param name="workGroupSize">Should be a power of 2 and greater than 1.</param>
let SpMSpVMasked
(add: Expr<'c option -> 'c option -> 'c option>)
(mul: Expr<'a option -> 'b option -> 'c option>)
(clContext: ClContext)
workGroupSize
=

let run =
SpMSpV.Masked.runMasked add mul clContext workGroupSize

fun (queue: RawCommandQueue) (matrix: ClMatrix<'a>) (vector: ClVector<'b>) (mask: ClVector<'d>) ->
match matrix, vector, mask with
| ClMatrix.CSR m, ClVector.Sparse v, ClVector.Dense mask -> Option.map ClVector.Sparse (run queue m v mask)
| _ -> failwith "Not implemented yet"

/// <summary>
/// CSR Matrix - sparse vector multiplication with mask. Mask is complemented. Optimized for bool OR and AND operations by skipping reduction stage.
/// </summary>
/// <param name="add">Type of binary function to reduce entries.</param>
/// <param name="mul">Type of binary function to combine entries.</param>
/// <param name="clContext">OpenCL context.</param>
/// <param name="workGroupSize">Should be a power of 2 and greater than 1.</param>
let SpMSpVMaskedBool
(add: Expr<bool option -> bool option -> bool option>)
(mul: Expr<bool option -> bool option -> bool option>)
(clContext: ClContext)
workGroupSize
=

let run =
SpMSpV.Masked.runMaskedBoolStandard add mul clContext workGroupSize

fun (queue: RawCommandQueue) (matrix: ClMatrix<'a>) (vector: ClVector<'b>) (mask: ClVector<'d>) ->
match matrix, vector, mask with
| ClMatrix.CSR m, ClVector.Sparse v, ClVector.Dense mask -> Option.map ClVector.Sparse (run queue m v mask)

Check warning on line 396 in src/GraphBLAS-sharp.Backend/Operations/Operations.fs

View workflow job for this annotation

GitHub Actions / Linux Build (No Tests)

This construct causes code to be less generic than indicated by the type annotations. The type variable 'b has been constrained to be type 'bool'.

Check warning on line 396 in src/GraphBLAS-sharp.Backend/Operations/Operations.fs

View workflow job for this annotation

GitHub Actions / Windows Build (No Tests)

This construct causes code to be less generic than indicated by the type annotations. The type variable 'b has been constrained to be type 'bool'.
| _ -> failwith "Not implemented yet"

/// <summary>
/// CSR Matrix - sparse vector multiplication.
/// </summary>
Expand Down
Loading
Loading