From 99dcf880b71dbec17c211d7312a74400ba433ad3 Mon Sep 17 00:00:00 2001 From: Tim Davis Date: Sat, 7 Dec 2024 06:59:16 -0500 Subject: [PATCH] save log of CUDA failures in AllKTruss --- experimental/test/test_AllKtruss.c | 21 +- save_errors/o1 | 29910 +++++++++++++++++++++++++++ save_errors/o2 | 9392 +++++++++ save_errors/o3 | 29910 +++++++++++++++++++++++++++ save_errors/o4 | 29910 +++++++++++++++++++++++++++ save_errors/o5 | 1781 ++ 6 files changed, 100915 insertions(+), 9 deletions(-) create mode 100644 save_errors/o1 create mode 100644 save_errors/o2 create mode 100644 save_errors/o3 create mode 100644 save_errors/o4 create mode 100644 save_errors/o5 diff --git a/experimental/test/test_AllKtruss.c b/experimental/test/test_AllKtruss.c index 3f59f77474..41ac3b8c36 100644 --- a/experimental/test/test_AllKtruss.c +++ b/experimental/test/test_AllKtruss.c @@ -38,15 +38,16 @@ matrix_info ; const matrix_info files [ ] = { - { 11, "A.mtx" }, - { 2016, "jagmesh7.mtx" }, +// FIXME +// { 11, "A.mtx" }, +// { 2016, "jagmesh7.mtx" }, { 342300, "bcsstk13.mtx" }, - { 45, "karate.mtx" }, - { 6, "ldbc-cdlp-undirected-example.mtx" }, - { 4, "ldbc-undirected-example-bool.mtx" }, - { 4, "ldbc-undirected-example-unweighted.mtx" }, - { 4, "ldbc-undirected-example.mtx" }, - { 5, "ldbc-wcc-example.mtx" }, +// { 45, "karate.mtx" }, +// { 6, "ldbc-cdlp-undirected-example.mtx" }, +// { 4, "ldbc-undirected-example-bool.mtx" }, +// { 4, "ldbc-undirected-example-unweighted.mtx" }, +// { 4, "ldbc-undirected-example.mtx" }, +// { 5, "ldbc-wcc-example.mtx" }, { 0, "" }, } ; @@ -54,6 +55,7 @@ const matrix_info files [ ] = void test_AllKTruss (void) { LAGraph_Init (msg) ; + OK (GrB_Global_set_INT32 (GrB_GLOBAL, true, (GrB_Field) GxB_BURBLE)) ; for (int id = 0 ; ; id++) { @@ -257,6 +259,7 @@ void test_allktruss_errors (void) TEST_LIST = { {"allktruss", test_AllKTruss}, - {"allktruss_errors", test_allktruss_errors}, +// FIXME +// {"allktruss_errors", test_allktruss_errors}, {NULL, NULL} }; diff --git a/save_errors/o1 b/save_errors/o1 new file mode 100644 index 0000000000..caa8c9e7ae --- /dev/null +++ b/save_errors/o1 @@ -0,0 +1,29910 @@ +Test allktruss... GB_cuda_get_device_count: 4, cudaError_t: 0 + +Device: 0: memory: 17071800320 SMs: 56 compute: 6.0 +GB_cuda_init: 0 + +================================== bcsstk13.mtx: + [ GrB_Matrix_build_FP64 (cast J 1 0) (step1: 0.00223027 sec) (step2: 0.0206156 sec) (build, 1 threads) (step3: 0.00084088 sec) (step4: 0.000638023 sec) (jit: cpu load) (step5: 0.00098481 sec) (build 32/32 time: 0.0253525) (hyper to sparse) (wrapup 64/64 time: 0.00322443) (convert ints 32/32 to 64/64, time: 0.000658564) + 0.0293 sec ] + [ GxB_Vector_diag (jit: cuda load) (sparse to hyper) (sparse to full) + 0.00595 sec ] + [ GrB_Vector_nvals + 1.07e-06 sec ] +graph has 2003 self edges + [ GrB_select (jit: cuda load) + 0.007 sec ] +now has 0 self edges + [ GrB_Matrix_nvals + 1.56e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc01fba00 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc01ff900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (jit: cuda load) (hyper to sparse) (jit: cuda load) (hyper to sparse) + 0.0202 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024f800 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00973 sec ] + [ GrB_select (hyper to sparse) + 0.0092 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000343 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc024b900 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc014bf00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00972 sec ] + [ GrB_select (hyper to sparse) + 0.00877 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024b900 shallow: 0 size: 16032 + ->i: 0x7effc0008200 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0148000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00969 sec ] + [ GrB_select (hyper to sparse) + 0.00858 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.07e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00956 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0393000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00975 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0183 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (hyper to sparse) + 0.00855 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0057600 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00979 sec ] + [ GrB_select (hyper to sparse) + 0.00853 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000106 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00932 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27866e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52638 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81039 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988740 shallow: 0 total: 988740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81038 entries, memory: 965.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52600 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81001 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988284 shallow: 0 total: 988284 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648008 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 324004 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81000 entries, memory: 965.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00969 sec ] + [ GrB_select (hyper to sparse) + 0.0093 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.273e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52568 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80969 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987900 shallow: 0 total: 987900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80968 entries, memory: 964.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00961 sec ] + [ GrB_select (hyper to sparse) + 0.00853 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00987 sec ] + [ GrB_select (hyper to sparse) + 0.00935 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0096 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00928 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000103 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00971 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.20416e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51834 +bucket 2: 28278 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80113 + vlen: 2003 nvec_nonempty: 1935 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 977628 shallow: 0 total: 977628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 640904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 320452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80112 entries, memory: 954.7 KB + pending tuples: 0 max pending: 0 zombies: 36 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (wait:A 36 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.17559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51490 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79755 + vlen: 2003 nvec_nonempty: 1926 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 973332 shallow: 0 total: 973332 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 638040 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc08c2f00 shallow: 0 size: 319020 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79754 entries, memory: 950.5 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.15333e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51226 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79475 + vlen: 2003 nvec_nonempty: 1911 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 969972 shallow: 0 total: 969972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038f000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 635800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79474 entries, memory: 947.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00842 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14223e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51092 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79335 + vlen: 2003 nvec_nonempty: 1901 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 968292 shallow: 0 total: 968292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79334 entries, memory: 945.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (hyper to sparse) + 0.00916 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00956 sec ] + [ GrB_select (hyper to sparse) + 0.00845 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00919 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00841 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00916 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00841 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00916 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000125 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00836 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.04165e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49922 +bucket 2: 28132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 78055 + vlen: 2003 nvec_nonempty: 1878 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 952932 shallow: 0 total: 952932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 624440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 312220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 78054 entries, memory: 930.6 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00943 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0173 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.97438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49104 +bucket 2: 28082 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77187 + vlen: 2003 nvec_nonempty: 1839 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 942516 shallow: 0 total: 942516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038e800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 617496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc09aab00 shallow: 0 size: 308748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77186 entries, memory: 920.4 KB + pending tuples: 0 max pending: 0 zombies: 16 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0094 sec ] + [ GrB_select (wait:A 16 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.94317e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48762 +bucket 2: 28018 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76781 + vlen: 2003 nvec_nonempty: 1813 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 937644 shallow: 0 total: 937644 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038db00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 614248 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 307124 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76780 entries, memory: 915.7 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93367e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48680 +bucket 2: 27976 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76657 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 936156 shallow: 0 total: 936156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc072e800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 613256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc09aab00 shallow: 0 size: 306628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76656 entries, memory: 914.2 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00939 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0174 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93092e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48660 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76621 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935724 shallow: 0 total: 935724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038d600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76620 entries, memory: 913.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00925 sec ] + [ GrB_select (hyper to sparse) + 0.00875 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0390f00 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00824 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (hyper to sparse) + 0.00825 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00894 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000128 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (hyper to sparse) + 0.0081 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.78437e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47082 +bucket 2: 27598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74681 + vlen: 2003 nvec_nonempty: 1767 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 912444 shallow: 0 total: 912444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 597448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 298724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74680 entries, memory: 891.1 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00915 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0185 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.70046e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46074 +bucket 2: 27472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73547 + vlen: 2003 nvec_nonempty: 1701 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 898836 shallow: 0 total: 898836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0383700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 588376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0a8b200 shallow: 0 size: 294188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73546 entries, memory: 877.8 KB + pending tuples: 0 max pending: 0 zombies: 46 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00907 sec ] + [ GrB_select (wait:A 46 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67745e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45800 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73233 + vlen: 2003 nvec_nonempty: 1643 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 895068 shallow: 0 total: 895068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0382d00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73232 entries, memory: 874.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00909 sec ] + [ GrB_select (hyper to sparse) + 0.00836 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67423e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45756 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73189 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 894540 shallow: 0 total: 894540 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585512 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292756 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73188 entries, memory: 873.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (hyper to sparse) + 0.00792 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6735e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45748 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73179 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824700 number of memory blocks: 4 + deep: 894420 shallow: 0 total: 894420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73178 entries, memory: 873.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00932 sec ] + [ GrB_select (hyper to sparse) + 0.0086 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824700 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00938 sec ] + [ GrB_select (hyper to sparse) + 0.00783 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00012 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00936 sec ] + [ GrB_select (hyper to sparse) + 0.00861 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.64363e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45482 +bucket 2: 27286 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72769 + vlen: 2003 nvec_nonempty: 1633 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 889500 shallow: 0 total: 889500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 582152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 291076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72768 entries, memory: 868.7 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00894 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45228 +bucket 2: 27192 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72421 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 885324 shallow: 0 total: 885324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081c800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 579368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0b61900 shallow: 0 size: 289684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72420 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00895 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6093e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45104 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72295 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 883812 shallow: 0 total: 883812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc072d600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72294 entries, memory: 863.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00898 sec ] + [ GrB_select (hyper to sparse) + 0.00779 sec ] + [ GrB_Matrix_nvals + 1.56e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00892 sec ] + [ GrB_select (hyper to sparse) + 0.00853 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000218 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015eb00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00894 sec ] + [ GrB_select (hyper to sparse) + 0.00776 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.51488e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 44110 +bucket 2: 26864 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70975 + vlen: 2003 nvec_nonempty: 1605 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 867972 shallow: 0 total: 867972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 567800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 283900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70974 entries, memory: 847.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0089 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0168 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.45627e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43418 +bucket 2: 26724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70143 + vlen: 2003 nvec_nonempty: 1580 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 857988 shallow: 0 total: 857988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037cc00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 561144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 280572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70142 entries, memory: 837.9 KB + pending tuples: 0 max pending: 0 zombies: 22 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00884 sec ] + [ GrB_select (wait:A 22 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0166 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.41651e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43012 +bucket 2: 26560 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69573 + vlen: 2003 nvec_nonempty: 1564 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 851148 shallow: 0 total: 851148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037fa00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 556584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 278292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69572 entries, memory: 831.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00876 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0157 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39834e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42848 +bucket 2: 26462 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69311 + vlen: 2003 nvec_nonempty: 1552 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 848004 shallow: 0 total: 848004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037b200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 554488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 277244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69310 entries, memory: 828.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015eb00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0153 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00882 sec ] + [ GrB_select (hyper to sparse) + 0.0083 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00877 sec ] + [ GrB_select (hyper to sparse) + 0.00753 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000131 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (hyper to sparse) + 0.00819 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.30569e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41946 +bucket 2: 26012 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67959 + vlen: 2003 nvec_nonempty: 1533 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 831780 shallow: 0 total: 831780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 543672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 271836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67958 entries, memory: 812.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00869 sec ] + [ GrB_select (hyper to sparse) + 0.00736 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.23646e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41162 +bucket 2: 25768 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66931 + vlen: 2003 nvec_nonempty: 1506 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 819444 shallow: 0 total: 819444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 535448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 267724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66930 entries, memory: 800.2 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21965e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41002 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66679 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 816420 shallow: 0 total: 816420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ad00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 533432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66678 entries, memory: 797.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (hyper to sparse) + 0.00728 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21659e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40982 +bucket 2: 25650 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66633 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815868 shallow: 0 total: 815868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ad00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 533064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66632 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00851 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21552e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40968 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66617 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815676 shallow: 0 total: 815676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66616 entries, memory: 796.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00879 sec ] + [ GrB_select (hyper to sparse) + 0.00725 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00805 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (hyper to sparse) + 0.00728 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ab00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0089 sec ] + [ GrB_select (hyper to sparse) + 0.00725 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ab00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00863 sec ] + [ GrB_select (hyper to sparse) + 0.00903 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081aa00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00892 sec ] + [ GrB_select (hyper to sparse) + 0.00901 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00891 sec ] + [ GrB_select (hyper to sparse) + 0.00904 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00901 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00887 sec ] + [ GrB_select (hyper to sparse) + 0.00903 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00902 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00902 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000102 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00718 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.88498e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36722 +bucket 2: 24724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 61447 + vlen: 2003 nvec_nonempty: 1387 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 753636 shallow: 0 total: 753636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 491576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 245788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 61446 entries, memory: 736.0 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 16 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00833 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0139 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77056e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35084 +bucket 2: 24468 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59553 + vlen: 2003 nvec_nonempty: 1198 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 730908 shallow: 0 total: 730908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0d39a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 476424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 238212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59552 entries, memory: 713.8 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00809 sec ] + [ GrB_select (hyper to sparse) + 0.00734 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75218e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34876 +bucket 2: 24366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59243 + vlen: 2003 nvec_nonempty: 1175 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 727188 shallow: 0 total: 727188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59242 entries, memory: 710.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00806 sec ] + [ GrB_select (hyper to sparse) + 0.00659 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0d39400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00816 sec ] + [ GrB_select (hyper to sparse) + 0.00732 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.93e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00804 sec ] + [ GrB_select (hyper to sparse) + 0.00661 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34622 +bucket 2: 24236 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58859 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 722580 shallow: 0 total: 722580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 470872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 235436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58858 entries, memory: 705.6 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00808 sec ] + [ GrB_select (hyper to sparse) + 0.00737 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71055e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34400 +bucket 2: 24134 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58535 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 718692 shallow: 0 total: 718692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 468280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 234140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58534 entries, memory: 701.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00804 sec ] + [ GrB_select (hyper to sparse) + 0.00659 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.69713e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34242 +bucket 2: 24062 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58305 + vlen: 2003 nvec_nonempty: 1165 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 715932 shallow: 0 total: 715932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 466440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 233220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58304 entries, memory: 699.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.008 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68771e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34156 +bucket 2: 23986 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58143 + vlen: 2003 nvec_nonempty: 1163 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 713988 shallow: 0 total: 713988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 465144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 232572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58142 entries, memory: 697.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00801 sec ] + [ GrB_select (hyper to sparse) + 0.00656 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67635e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34032 +bucket 2: 23914 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57947 + vlen: 2003 nvec_nonempty: 1161 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 711636 shallow: 0 total: 711636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 463576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57946 entries, memory: 695.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00799 sec ] + [ GrB_select (hyper to sparse) + 0.0073 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66884e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33960 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57817 + vlen: 2003 nvec_nonempty: 1156 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 710076 shallow: 0 total: 710076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 462536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57816 entries, memory: 693.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00799 sec ] + [ GrB_select (hyper to sparse) + 0.00651 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66642e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33938 +bucket 2: 23836 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57775 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 709572 shallow: 0 total: 709572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 462200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57774 entries, memory: 692.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00794 sec ] + [ GrB_select (hyper to sparse) + 0.00728 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23806 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57741 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 709164 shallow: 0 total: 709164 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461928 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230964 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57740 entries, memory: 692.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66307e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23782 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57717 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 708876 shallow: 0 total: 708876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57716 entries, memory: 692.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000135 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.61411e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33340 +bucket 2: 23520 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56861 + vlen: 2003 nvec_nonempty: 1145 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 698604 shallow: 0 total: 698604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 454888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 227444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56860 entries, memory: 682.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00785 sec ] + [ GrB_select (hyper to sparse) + 0.00635 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.57629e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32886 +bucket 2: 23304 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56191 + vlen: 2003 nvec_nonempty: 1126 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 690564 shallow: 0 total: 690564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 449528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 224764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56190 entries, memory: 674.4 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00782 sec ] + [ GrB_select (hyper to sparse) + 0.00707 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55449e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32744 +bucket 2: 23056 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55801 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0398000 number of memory blocks: 4 + deep: 685884 shallow: 0 total: 685884 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 446408 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 223204 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55800 entries, memory: 669.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00749 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32734 +bucket 2: 22874 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55609 + vlen: 2003 nvec_nonempty: 1112 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 683580 shallow: 0 total: 683580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 444872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 222436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55608 entries, memory: 667.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0079 sec ] + [ GrB_select (hyper to sparse) + 0.00627 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32638 +bucket 2: 22854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55493 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 682188 shallow: 0 total: 682188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55492 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00785 sec ] + [ GrB_select (hyper to sparse) + 0.00708 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00787 sec ] + [ GrB_select (hyper to sparse) + 0.00631 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.8e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00787 sec ] + [ GrB_select (hyper to sparse) + 0.007 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.50245e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32174 +bucket 2: 22684 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54859 + vlen: 2003 nvec_nonempty: 1100 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 674580 shallow: 0 total: 674580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 438872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 219436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54858 entries, memory: 658.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00772 sec ] + [ GrB_select (hyper to sparse) + 0.00618 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45808e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31858 +bucket 2: 22184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54043 + vlen: 2003 nvec_nonempty: 1091 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 664788 shallow: 0 total: 664788 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 432344 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 216172 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54042 entries, memory: 649.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00771 sec ] + [ GrB_select (hyper to sparse) + 0.00687 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42407e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 21822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53409 + vlen: 2003 nvec_nonempty: 1084 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0736900 number of memory blocks: 4 + deep: 657180 shallow: 0 total: 657180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 427272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 213636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53408 entries, memory: 641.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00772 sec ] + [ GrB_select (hyper to sparse) + 0.00651 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00776 sec ] + [ GrB_select (hyper to sparse) + 0.00685 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.91e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00761 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.35413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31126 +bucket 2: 20954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52081 + vlen: 2003 nvec_nonempty: 1060 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 641244 shallow: 0 total: 641244 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 416648 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 208324 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52080 entries, memory: 626.2 KB + + column: 0 : 15 entries [0:14] + row 1: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + row 167: 14 + column: 1 : 15 entries [15:29] + row 0: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + ... + Pending (nil) + + 0.00748 sec ] + [ GrB_select (hyper to sparse) + 0.00668 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.289e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 20226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50813 + vlen: 2003 nvec_nonempty: 1029 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 626028 shallow: 0 total: 626028 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 406504 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 203252 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50812 entries, memory: 611.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0075 sec ] + [ GrB_select (hyper to sparse) + 0.00588 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25563e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30158 +bucket 2: 19992 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50151 + vlen: 2003 nvec_nonempty: 1016 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 618084 shallow: 0 total: 618084 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 401208 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 200604 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50150 entries, memory: 603.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00742 sec ] + [ GrB_select (hyper to sparse) + 0.00658 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24304e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29956 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49899 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 615060 shallow: 0 total: 615060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 399192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 199596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49898 entries, memory: 600.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00739 sec ] + [ GrB_select (hyper to sparse) + 0.00581 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00737 sec ] + [ GrB_select (hyper to sparse) + 0.00659 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00734 sec ] + [ GrB_select (hyper to sparse) + 0.00583 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000122 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6c00 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00743 sec ] + [ GrB_select (hyper to sparse) + 0.0065 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.18699e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29308 +bucket 2: 19452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48761 + vlen: 2003 nvec_nonempty: 984 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 601404 shallow: 0 total: 601404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 390088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 195044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48760 entries, memory: 587.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00731 sec ] + [ GrB_select (hyper to sparse) + 0.00564 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13766e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28540 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47737 + vlen: 2003 nvec_nonempty: 937 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 589116 shallow: 0 total: 589116 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 381896 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190948 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47736 entries, memory: 575.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00728 sec ] + [ GrB_select (hyper to sparse) + 0.00636 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13337e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28450 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47647 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 588036 shallow: 0 total: 588036 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 381176 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190588 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47646 entries, memory: 574.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00726 sec ] + [ GrB_select (hyper to sparse) + 0.00564 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28390 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47587 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 587316 shallow: 0 total: 587316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 380696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47586 entries, memory: 573.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00725 sec ] + [ GrB_select (hyper to sparse) + 0.00639 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00722 sec ] + [ GrB_select (hyper to sparse) + 0.00562 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000123 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6d00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00734 sec ] + [ GrB_select (hyper to sparse) + 0.00554 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.08267e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 27588 +bucket 2: 18980 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46569 + vlen: 2003 nvec_nonempty: 914 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 575100 shallow: 0 total: 575100 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 372552 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 186276 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46568 entries, memory: 561.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 18 entries [0:17] + row 7: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 16 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [18:35] + row 6: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + ... + Pending (nil) + + 0.00713 sec ] + [ GrB_select (hyper to sparse) + 0.00584 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02496e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26894 +bucket 2: 18416 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45311 + vlen: 2003 nvec_nonempty: 858 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 560004 shallow: 0 total: 560004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 362488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 181244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45310 entries, memory: 546.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 16 entries [0:15] + row 7: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + row 159: 15 + row 160: 15 + row 161: 15 + column: 7 : 16 entries [16:31] + row 6: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + ... + Pending (nil) + + 0.00703 sec ] + [ GrB_select (hyper to sparse) + 0.0061 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00229e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26438 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44807 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 553956 shallow: 0 total: 553956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44806 entries, memory: 541.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00535 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00691 sec ] + [ GrB_select (hyper to sparse) + 0.0061 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000123 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.00533 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6c00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017a400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00688 sec ] + [ GrB_select (hyper to sparse) + 0.00607 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000121 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017a400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00692 sec ] + [ GrB_select (hyper to sparse) + 0.00532 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00012 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017e300 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00692 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987221 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26344 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44469 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 549900 shallow: 0 total: 549900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44468 entries, memory: 537.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (hyper to sparse) + 0.00532 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (hyper to sparse) + 0.00592 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000122 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.00535 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:975092 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26240 +bucket 2: 17954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44195 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 546612 shallow: 0 total: 546612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 353560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 176780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44194 entries, memory: 533.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00683 sec ] + [ GrB_select (hyper to sparse) + 0.00604 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:954375 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25948 +bucket 2: 17774 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43723 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 540948 shallow: 0 total: 540948 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 349784 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 174892 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43722 entries, memory: 528.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00677 sec ] + [ GrB_select (hyper to sparse) + 0.0052 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:929136 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25466 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43141 + vlen: 2003 nvec_nonempty: 809 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 533964 shallow: 0 total: 533964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 345128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 172564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43140 entries, memory: 521.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00667 sec ] + [ GrB_select (hyper to sparse) + 0.00587 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00669 sec ] + [ GrB_select (hyper to sparse) + 0.00517 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00667 sec ] + [ GrB_select (hyper to sparse) + 0.00589 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00662 sec ] + [ GrB_select (hyper to sparse) + 0.00515 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00665 sec ] + [ GrB_select (hyper to sparse) + 0.0059 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000123 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7200 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00673 sec ] + [ GrB_select (hyper to sparse) + 0.00503 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825948 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25120 +bucket 2: 15554 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40675 + vlen: 2003 nvec_nonempty: 789 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 504372 shallow: 0 total: 504372 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 325400 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 162700 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40674 entries, memory: 492.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00653 sec ] + [ GrB_select (hyper to sparse) + 0.0049 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:672510 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23224 +bucket 2: 13478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 36703 + vlen: 2003 nvec_nonempty: 736 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 456708 shallow: 0 total: 456708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 293624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 146812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 36702 entries, memory: 446.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00629 sec ] + [ GrB_select (hyper to sparse) + 0.00494 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:629110 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23236 +bucket 2: 12262 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35499 + vlen: 2003 nvec_nonempty: 698 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 442260 shallow: 0 total: 442260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 283992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 141996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35498 entries, memory: 431.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0062 sec ] + [ GrB_select (hyper to sparse) + 0.00453 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:619084 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23468 +bucket 2: 11746 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35215 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0700a00 number of memory blocks: 4 + deep: 438852 shallow: 0 total: 438852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 281720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 140860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35214 entries, memory: 428.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00622 sec ] + [ GrB_select (hyper to sparse) + 0.00442 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612282 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23654 +bucket 2: 11366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35021 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06ffe00 number of memory blocks: 4 + deep: 436524 shallow: 0 total: 436524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 280168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 140084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35020 entries, memory: 426.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0062 sec ] + [ GrB_select (hyper to sparse) + 0.00439 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00617 sec ] + [ GrB_select (hyper to sparse) + 0.00513 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00618 sec ] + [ GrB_select (hyper to sparse) + 0.00437 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.36e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00616 sec ] + [ GrB_select (hyper to sparse) + 0.00398 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418019 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24542 +bucket 2: 4394 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28937 + vlen: 2003 nvec_nonempty: 630 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 363516 shallow: 0 total: 363516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0358500 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 231496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 115748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28936 entries, memory: 355.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00584 sec ] + [ GrB_select (hyper to sparse) + 0.00378 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:328878 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25594 +bucket 2: 72 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25667 + vlen: 2003 nvec_nonempty: 579 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 324276 shallow: 0 total: 324276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 205336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 102668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25666 entries, memory: 316.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0046 sec ] + [ GrB_select (hyper to sparse) + 0.00407 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc035a700 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00426 sec ] + [ GrB_select (hyper to sparse) + 0.00438 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.47e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0359e00 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00416 sec ] + [ GrB_select (hyper to sparse) + 0.0036 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.56e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7500 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00414 sec ] + [ GrB_select (hyper to sparse) + 0.00308 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:151605 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17426 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17427 + vlen: 2003 nvec_nonempty: 480 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 225396 shallow: 0 total: 225396 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 139416 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 69708 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17426 entries, memory: 220.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00346 sec ] + [ GrB_select (hyper to sparse) + 0.00264 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106625 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14614 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14615 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 191652 shallow: 0 total: 191652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0decb00 shallow: 0 size: 58460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14614 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00318 sec ] + [ GrB_select (hyper to sparse) + 0.00291 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00316 sec ] + [ GrB_select (hyper to sparse) + 0.00343 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000104 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7600 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00318 sec ] + [ GrB_select (hyper to sparse) + 0.00262 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:88339.1 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 13302 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 13303 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 175908 shallow: 0 total: 175908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0191e00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 106424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 53212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 13302 entries, memory: 171.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00303 sec ] + [ GrB_select (hyper to sparse) + 0.00319 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:77211.2 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12436 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12437 + vlen: 2003 nvec_nonempty: 314 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dcf00 number of memory blocks: 4 + deep: 165516 shallow: 0 total: 165516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 99496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12436 entries, memory: 161.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0031 sec ] + [ GrB_select (hyper to sparse) + 0.00282 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0034 sec ] + [ GrB_select (hyper to sparse) + 0.00315 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.28e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7700 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00296 sec ] + [ GrB_select + 0.00144 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:66277 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 3668 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 3669 + vlen: 2003 nvec_nonempty: 203 nvec: 203 plen: 203 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc014f900 number of memory blocks: 5 + deep: 47524 shallow: 0 total: 47524 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc06ffb00 shallow: 0 size: 1624 + ->p: 0x7effc06ff400 shallow: 0 size: 1632 + ->i: 0x7effc0dec900 shallow: 0 size: 29352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc06d9000 shallow: 0 size: 14676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 3668 entries, memory: 46.4 KB + + column: 933 : 13 entries [0:12] + row 934: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 934 : 13 entries [13:25] + row 933: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 935 : 13 entries [26:38] + row 933: 12 + row 934: 12 + row 936: 12 + ... + Pending (nil) + + 0.00189 sec ] + [ GrB_select (jit: cuda load) + 0.0026 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:1512 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 252 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 253 + vlen: 2003 nvec_nonempty: 42 nvec: 42 plen: 42 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 5 + deep: 3956 shallow: 0 total: 3956 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc0dfb500 shallow: 0 size: 336 + ->p: 0x7effc0dfb300 shallow: 0 size: 344 + ->i: 0x7effc00a7400 shallow: 0 size: 2024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc00a7c00 shallow: 0 size: 1012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 252 entries, memory: 3.9 KB + + column: 1031 : 6 entries [0:5] + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1032 : 6 entries [6:11] + row 1031: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1033 : 6 entries [12:17] + row 1031: 5 + row 1032: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1034 : 6 entries [18:23] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1038 : 6 entries [24:29] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1039: 5 + ... + Pending (nil) + + 0.00145 sec ] + [ GrB_select C is empty, iso 0 + + 0.00049 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 (jit: cpu load) + 0.000276 sec ] + [ GrB_select C is empty, iso 0 + + 0.000412 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 4.88e-06 sec ] +all k-truss: kmax 29 + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00985 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0174 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0099 sec ] + [ GrB_select (hyper to sparse) + 0.00865 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) (jit: cpu load) + 0.00204 sec ] + [ GrB_Matrix_nvals + 5.29e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000285 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000262 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00995 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.018 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (hyper to sparse) + 0.0086 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00184 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000134 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00991 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0186 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ac900 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00996 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.018 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155bb00 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155bb00 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00186 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000116 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000111 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0179 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28935e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52770 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81171 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 990324 shallow: 0 total: 990324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ac500 shallow: 0 size: 324684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81170 entries, memory: 967.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00997 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0185 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52622 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81023 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 988548 shallow: 0 total: 988548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155b400 shallow: 0 size: 324092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81022 entries, memory: 965.4 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00961 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52582 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80983 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 988068 shallow: 0 total: 988068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15aa300 shallow: 0 size: 323932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80982 entries, memory: 964.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (hyper to sparse) + 0.00927 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15aa100 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00968 sec ] + [ GrB_select (hyper to sparse) + 0.00924 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00999 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.00926 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00182 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00011 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000163 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0178 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.22419e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52082 +bucket 2: 28280 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80363 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 980628 shallow: 0 total: 980628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 642904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 321452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80362 entries, memory: 957.6 KB + pending tuples: 0 max pending: 0 zombies: 38 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00984 sec ] + [ GrB_select (wait:A 38 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.18132e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51562 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79827 + vlen: 2003 nvec_nonempty: 1928 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 974196 shallow: 0 total: 974196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 638616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1557800 shallow: 0 size: 319308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79826 entries, memory: 951.4 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0182 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.1573e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51276 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79525 + vlen: 2003 nvec_nonempty: 1913 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 970572 shallow: 0 total: 970572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 636200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a3a00 shallow: 0 size: 318100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79524 entries, memory: 947.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00975 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14461e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51122 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79365 + vlen: 2003 nvec_nonempty: 1903 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 968652 shallow: 0 total: 968652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a2f00 shallow: 0 size: 317460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79364 entries, memory: 945.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00906 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a2300 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00979 sec ] + [ GrB_select (hyper to sparse) + 0.00834 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1c00 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00827 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1800 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00981 sec ] + [ GrB_select (hyper to sparse) + 0.00829 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1700 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00955 sec ] + [ GrB_select (hyper to sparse) + 0.00825 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1300 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00985 sec ] + [ GrB_select (hyper to sparse) + 0.0083 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1000 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00825 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 4.47e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00187 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000111 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000145 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12704e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50958 +bucket 2: 28184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79143 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 965988 shallow: 0 total: 965988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a6600 shallow: 0 size: 316572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79142 entries, memory: 943.3 KB + pending tuples: 0 max pending: 0 zombies: 242 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 25 entries [29:53] + ... + Pending (nil) + + 0.00986 sec ] + [ GrB_select (wait:A 242 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.00653e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49476 +bucket 2: 28126 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77603 + vlen: 2003 nvec_nonempty: 1857 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 947508 shallow: 0 total: 947508 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 620824 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1550600 shallow: 0 size: 310412 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77602 entries, memory: 925.3 KB + pending tuples: 0 max pending: 0 zombies: 28 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00936 sec ] + [ GrB_select (wait:A 28 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.95361e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48884 +bucket 2: 28032 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76917 + vlen: 2003 nvec_nonempty: 1832 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 939276 shallow: 0 total: 939276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 615336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1597900 shallow: 0 size: 307668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76916 entries, memory: 917.3 KB + pending tuples: 0 max pending: 0 zombies: 24 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (wait:A 24 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93781e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48728 +bucket 2: 27982 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76711 + vlen: 2003 nvec_nonempty: 1807 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 936804 shallow: 0 total: 936804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc154ab00 shallow: 0 size: 306844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76710 entries, memory: 914.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00969 sec ] + [ GrB_select (hyper to sparse) + 0.00814 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93122e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48664 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76625 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 935772 shallow: 0 total: 935772 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613000 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14b4500 shallow: 0 size: 306500 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76624 entries, memory: 913.8 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0096 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594c00 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00957 sec ] + [ GrB_select (hyper to sparse) + 0.00889 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594b00 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00814 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594b00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00933 sec ] + [ GrB_select (hyper to sparse) + 0.00887 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00183 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000117 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000169 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.98471e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49590 +bucket 2: 27730 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77321 + vlen: 2003 nvec_nonempty: 1940 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 944124 shallow: 0 total: 944124 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 618568 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1100 shallow: 0 size: 309284 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77320 entries, memory: 922.0 KB + pending tuples: 0 max pending: 0 zombies: 328 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00959 sec ] + [ GrB_select (wait:A 328 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0168 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.74425e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46632 +bucket 2: 27508 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74141 + vlen: 2003 nvec_nonempty: 1823 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 905964 shallow: 0 total: 905964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 593128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1545e00 shallow: 0 size: 296564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74140 entries, memory: 884.7 KB + pending tuples: 0 max pending: 0 zombies: 158 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00913 sec ] + [ GrB_select (wait:A 158 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.69752e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46054 +bucket 2: 27452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73507 + vlen: 2003 nvec_nonempty: 1660 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 898356 shallow: 0 total: 898356 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 588056 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1586900 shallow: 0 size: 294028 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73506 entries, memory: 877.3 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00933 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.68227e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45856 +bucket 2: 27442 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73299 + vlen: 2003 nvec_nonempty: 1647 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 895860 shallow: 0 total: 895860 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 586392 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153d700 shallow: 0 size: 293196 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73298 entries, memory: 874.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00907 sec ] + [ GrB_select (hyper to sparse) + 0.00778 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67525e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45770 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73203 + vlen: 2003 nvec_nonempty: 1641 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 894708 shallow: 0 total: 894708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ada00 shallow: 0 size: 292812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73202 entries, memory: 873.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00931 sec ] + [ GrB_select (hyper to sparse) + 0.00855 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67306e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73173 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 894348 shallow: 0 total: 894348 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585384 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ad900 shallow: 0 size: 292692 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73172 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00923 sec ] + [ GrB_select (hyper to sparse) + 0.00768 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ad900 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00932 sec ] + [ GrB_select (hyper to sparse) + 0.00775 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00181 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000153 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0172 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92388e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48960 +bucket 2: 27568 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76529 + vlen: 2003 nvec_nonempty: 1936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 934620 shallow: 0 total: 934620 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612232 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc159eb00 shallow: 0 size: 306116 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76528 entries, memory: 912.7 KB + pending tuples: 0 max pending: 0 zombies: 350 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (wait:A 350 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0168 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.66168e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45746 +bucket 2: 27270 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73017 + vlen: 2003 nvec_nonempty: 1771 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 892476 shallow: 0 total: 892476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 584136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1542100 shallow: 0 size: 292068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73016 entries, memory: 871.6 KB + pending tuples: 0 max pending: 0 zombies: 132 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00902 sec ] + [ GrB_select (wait:A 132 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0167 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.61855e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45198 +bucket 2: 27224 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72423 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 885348 shallow: 0 total: 885348 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 579384 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1581400 shallow: 0 size: 289692 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72422 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00922 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0154 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.61016e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45116 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72307 + vlen: 2003 nvec_nonempty: 1614 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 883956 shallow: 0 total: 883956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1539600 shallow: 0 size: 289228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72306 entries, memory: 863.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (hyper to sparse) + 0.00842 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14abe00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00923 sec ] + [ GrB_select (hyper to sparse) + 0.00765 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00181 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000114 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000143 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0182 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.79303e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47624 +bucket 2: 27172 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74797 + vlen: 2003 nvec_nonempty: 1921 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 913836 shallow: 0 total: 913836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 598376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1599a00 shallow: 0 size: 299188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74796 entries, memory: 892.4 KB + pending tuples: 0 max pending: 0 zombies: 142 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00944 sec ] + [ GrB_select (wait:A 142 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.48746e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43754 +bucket 2: 26832 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70587 + vlen: 2003 nvec_nonempty: 1634 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 863316 shallow: 0 total: 863316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 564696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153a500 shallow: 0 size: 282348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70586 entries, memory: 843.1 KB + pending tuples: 0 max pending: 0 zombies: 96 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00895 sec ] + [ GrB_select (wait:A 96 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.42123e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43058 +bucket 2: 26582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69641 + vlen: 2003 nvec_nonempty: 1565 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 851964 shallow: 0 total: 851964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 557128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1574700 shallow: 0 size: 278564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69640 entries, memory: 832.0 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0149 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39862e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42842 +bucket 2: 26472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69315 + vlen: 2003 nvec_nonempty: 1553 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 848052 shallow: 0 total: 848052 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 554520 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152e200 shallow: 0 size: 277260 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69314 entries, memory: 828.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1570800 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.00946 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0155 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152c600 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00819 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a5500 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00933 sec ] + [ GrB_select (hyper to sparse) + 0.00742 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00177 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000151 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0166 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.65381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46018 +bucket 2: 26890 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72909 + vlen: 2003 nvec_nonempty: 1883 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 891180 shallow: 0 total: 891180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 583272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594200 shallow: 0 size: 291636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72908 entries, memory: 870.3 KB + pending tuples: 0 max pending: 0 zombies: 198 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 18 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 17 + row 167: 22 + row 173: 8 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 13 + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (wait:A 198 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.29674e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41694 +bucket 2: 26132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67827 + vlen: 2003 nvec_nonempty: 1598 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 830196 shallow: 0 total: 830196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 542616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1531300 shallow: 0 size: 271308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67826 entries, memory: 810.7 KB + pending tuples: 0 max pending: 0 zombies: 102 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00902 sec ] + [ GrB_select (wait:A 102 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0153 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.22885e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41008 +bucket 2: 25808 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66817 + vlen: 2003 nvec_nonempty: 1496 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 818076 shallow: 0 total: 818076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 534536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1566b00 shallow: 0 size: 267268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66816 entries, memory: 798.9 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00907 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0153 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21872e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40988 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66665 + vlen: 2003 nvec_nonempty: 1492 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 816252 shallow: 0 total: 816252 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533320 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1523600 shallow: 0 size: 266660 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66664 entries, memory: 797.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00926 sec ] + [ GrB_select (hyper to sparse) + 0.00796 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21619e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40970 +bucket 2: 25656 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66627 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815796 shallow: 0 total: 815796 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533016 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0d00 shallow: 0 size: 266508 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66626 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00922 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0c00 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00925 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0c00 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00878 sec ] + [ GrB_select (hyper to sparse) + 0.00719 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0b00 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00873 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0a00 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00932 sec ] + [ GrB_select (hyper to sparse) + 0.00718 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0900 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00718 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0800 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00931 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0600 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00926 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0500 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0400 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0300 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00886 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0300 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00886 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00176 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000117 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000168 sec ] + [ GrB_Matrix_nvals + 1.42e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.28105e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41598 +bucket 2: 25996 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67595 + vlen: 2003 nvec_nonempty: 1872 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 827412 shallow: 0 total: 827412 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 540760 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1584900 shallow: 0 size: 270380 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67594 entries, memory: 808.0 KB + pending tuples: 0 max pending: 0 zombies: 392 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00896 sec ] + [ GrB_select (wait:A 392 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0141 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.83985e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35884 +bucket 2: 24822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60707 + vlen: 2003 nvec_nonempty: 1405 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 744756 shallow: 0 total: 744756 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 485656 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1518900 shallow: 0 size: 242828 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60706 entries, memory: 727.3 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00871 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0144 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77961e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35146 +bucket 2: 24558 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59705 + vlen: 2003 nvec_nonempty: 1187 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 732732 shallow: 0 total: 732732 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 477640 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1544100 shallow: 0 size: 238820 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59704 entries, memory: 715.6 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00854 sec ] + [ GrB_select (hyper to sparse) + 0.00657 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.76106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34938 +bucket 2: 24454 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59393 + vlen: 2003 nvec_nonempty: 1179 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 728988 shallow: 0 total: 728988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 475144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1541e00 shallow: 0 size: 237572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59392 entries, memory: 711.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (hyper to sparse) + 0.00735 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75147e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34870 +bucket 2: 24360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59231 + vlen: 2003 nvec_nonempty: 1171 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 727044 shallow: 0 total: 727044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1540a00 shallow: 0 size: 236924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59230 entries, memory: 710.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00855 sec ] + [ GrB_select (hyper to sparse) + 0.00656 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153fd00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00853 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00211 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000106 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000139 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0161 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2382e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41186 +bucket 2: 25770 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66957 + vlen: 2003 nvec_nonempty: 1842 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 819756 shallow: 0 total: 819756 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 535656 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1582b00 shallow: 0 size: 267828 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66956 entries, memory: 800.5 KB + pending tuples: 0 max pending: 0 zombies: 366 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00895 sec ] + [ GrB_select (wait:A 366 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0147 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.79826e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35436 +bucket 2: 24580 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60017 + vlen: 2003 nvec_nonempty: 1381 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 736476 shallow: 0 total: 736476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 480136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1515f00 shallow: 0 size: 240068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60016 entries, memory: 719.2 KB + pending tuples: 0 max pending: 0 zombies: 44 + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (wait:A 44 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0143 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72003e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34440 +bucket 2: 24256 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58697 + vlen: 2003 nvec_nonempty: 1180 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 720636 shallow: 0 total: 720636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 469576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153fe00 shallow: 0 size: 234788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58696 entries, memory: 703.7 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00854 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0137 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6962e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34220 +bucket 2: 24068 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58289 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 715740 shallow: 0 total: 715740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 466312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 233156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58288 entries, memory: 699.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.0072 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34064 +bucket 2: 23954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58019 + vlen: 2003 nvec_nonempty: 1162 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 712500 shallow: 0 total: 712500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 464152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490000 shallow: 0 size: 232076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58018 entries, memory: 695.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.0084 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67231e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34000 +bucket 2: 23876 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57877 + vlen: 2003 nvec_nonempty: 1157 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 710796 shallow: 0 total: 710796 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 463016 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148fb00 shallow: 0 size: 231508 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57876 entries, memory: 694.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00847 sec ] + [ GrB_select (hyper to sparse) + 0.00641 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66838e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33952 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57809 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709980 shallow: 0 total: 709980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f900 shallow: 0 size: 231236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57808 entries, memory: 693.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6655e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33932 +bucket 2: 23826 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57759 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709380 shallow: 0 total: 709380 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462072 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 231036 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57758 entries, memory: 692.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00862 sec ] + [ GrB_select (hyper to sparse) + 0.00638 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66388e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33928 +bucket 2: 23802 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57731 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709044 shallow: 0 total: 709044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 230924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57730 entries, memory: 692.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.0082 sec ] + [ GrB_select (hyper to sparse) + 0.00638 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66261e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33910 +bucket 2: 23798 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57709 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708780 shallow: 0 total: 708780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57708 entries, memory: 692.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.00639 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66215e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33922 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57701 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708684 shallow: 0 total: 708684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57700 entries, memory: 692.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00841 sec ] + [ GrB_select (hyper to sparse) + 0.00637 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00842 sec ] + [ GrB_select (hyper to sparse) + 0.00636 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00206 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.67e-05 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000144 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0168 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.15672e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40338 +bucket 2: 25388 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 65727 + vlen: 2003 nvec_nonempty: 1822 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 804996 shallow: 0 total: 804996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 525816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc157f000 shallow: 0 size: 262908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 65726 entries, memory: 786.1 KB + pending tuples: 0 max pending: 0 zombies: 362 + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 10 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 10 + row 9: 19 + row 10: 19 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 14 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 10 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.009 sec ] + [ GrB_select (wait:A 362 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0142 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71534e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34398 +bucket 2: 24218 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58617 + vlen: 2003 nvec_nonempty: 1305 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 719676 shallow: 0 total: 719676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 468936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1510d00 shallow: 0 size: 234468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58616 entries, memory: 702.8 KB + pending tuples: 0 max pending: 0 zombies: 20 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0085 sec ] + [ GrB_select (wait:A 20 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6248e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33306 +bucket 2: 23742 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57049 + vlen: 2003 nvec_nonempty: 1155 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 700860 shallow: 0 total: 700860 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 456392 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1538500 shallow: 0 size: 228196 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57048 entries, memory: 684.4 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00829 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0137 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.5773e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32858 +bucket 2: 23350 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56209 + vlen: 2003 nvec_nonempty: 1134 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 690780 shallow: 0 total: 690780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 449672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fbe00 shallow: 0 size: 224836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56208 entries, memory: 674.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00812 sec ] + [ GrB_select (hyper to sparse) + 0.00673 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54837e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32788 +bucket 2: 22902 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55691 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 684564 shallow: 0 total: 684564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 445528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b700 shallow: 0 size: 222764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55690 entries, memory: 668.5 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.0062 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53904e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32660 +bucket 2: 22862 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55523 + vlen: 2003 nvec_nonempty: 1107 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 682548 shallow: 0 total: 682548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 444184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b200 shallow: 0 size: 222092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55522 entries, memory: 666.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00804 sec ] + [ GrB_select (hyper to sparse) + 0.00694 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00813 sec ] + [ GrB_select (hyper to sparse) + 0.00619 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00196 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.2e-05 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000134 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0166 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.05966e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 39348 +bucket 2: 24882 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 64231 + vlen: 2003 nvec_nonempty: 1805 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 787044 shallow: 0 total: 787044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 513848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc157ab00 shallow: 0 size: 256924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 64230 entries, memory: 768.6 KB + pending tuples: 0 max pending: 0 zombies: 414 + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 6 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 9 + row 9: 19 + row 10: 19 + row 11: 18 + row 156: 17 + row 157: 17 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 22 entries [21:42] + row 0: 20 + row 2: 6 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 9 + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (wait:A 414 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.58371e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32850 +bucket 2: 23472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56323 + vlen: 2003 nvec_nonempty: 1164 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 692148 shallow: 0 total: 692148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 450584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1509600 shallow: 0 size: 225292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56322 entries, memory: 675.9 KB + pending tuples: 0 max pending: 0 zombies: 8 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00832 sec ] + [ GrB_select (wait:A 8 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0128 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.48377e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32068 +bucket 2: 22448 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54517 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 670476 shallow: 0 total: 670476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 436136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152c600 shallow: 0 size: 218068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54516 entries, memory: 654.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00804 sec ] + [ GrB_select (hyper to sparse) + 0.00682 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.43283e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31686 +bucket 2: 21886 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53573 + vlen: 2003 nvec_nonempty: 1090 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 659148 shallow: 0 total: 659148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 428584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1528000 shallow: 0 size: 214292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53572 entries, memory: 643.7 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00782 sec ] + [ GrB_select (hyper to sparse) + 0.00604 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41661e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31630 +bucket 2: 21638 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53269 + vlen: 2003 nvec_nonempty: 1078 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 655500 shallow: 0 total: 655500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 426152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1524100 shallow: 0 size: 213076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53268 entries, memory: 640.1 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00783 sec ] + [ GrB_select (hyper to sparse) + 0.00604 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1522c00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00778 sec ] + [ GrB_select (hyper to sparse) + 0.006 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0019 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.77e-05 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0166 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.92656e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 37642 +bucket 2: 24478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 62121 + vlen: 2003 nvec_nonempty: 1774 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 761724 shallow: 0 total: 761724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 496968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1574800 shallow: 0 size: 248484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 62120 entries, memory: 743.9 KB + pending tuples: 0 max pending: 0 zombies: 426 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 13 + row 163: 12 + row 165: 8 + row 166: 10 + row 167: 18 + column: 1 : 21 entries [19:39] + row 0: 18 + row 2: 1 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + ... + Pending (nil) + + 0.00872 sec ] + [ GrB_select (wait:A 426 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0137 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45722e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31526 +bucket 2: 22500 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54027 + vlen: 2003 nvec_nonempty: 1135 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 664596 shallow: 0 total: 664596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 432216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1500b00 shallow: 0 size: 216108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54026 entries, memory: 649.0 KB + pending tuples: 0 max pending: 0 zombies: 20 + + column: 0 : 14 entries [0:13] + row 1: 13 + row 3: 11 + row 4: 13 + row 5: 13 + row 6: 13 + row 7: 13 + row 9: 13 + row 10: 13 + row 156: 13 + row 157: 13 + row 159: 12 + row 160: 12 + row 161: 13 + row 167: 13 + column: 1 : 14 entries [14:27] + row 0: 13 + row 3: 11 + row 4: 13 + row 5: 13 + row 6: 13 + row 7: 13 + row 9: 13 + row 10: 13 + row 156: 13 + row 157: 13 + row 159: 12 + row 160: 12 + row 161: 13 + row 167: 13 + column: 2 : 0 entries [28:27] + column: 3 : 12 entries [28:39] + row 0: 11 + ... + Pending (nil) + + 0.00814 sec ] + [ GrB_select (wait:A 20 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0127 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.33568e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30944 +bucket 2: 20780 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51725 + vlen: 2003 nvec_nonempty: 1075 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 636972 shallow: 0 total: 636972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 413800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc151fc00 shallow: 0 size: 206900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51724 entries, memory: 622.0 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00768 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0116 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.27432e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30334 +bucket 2: 20188 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50523 + vlen: 2003 nvec_nonempty: 1026 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 622548 shallow: 0 total: 622548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 404184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14e6600 shallow: 0 size: 202092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50522 entries, memory: 608.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00765 sec ] + [ GrB_select (hyper to sparse) + 0.00646 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25143e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30100 +bucket 2: 19966 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50067 + vlen: 2003 nvec_nonempty: 1012 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 617076 shallow: 0 total: 617076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 400536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480700 shallow: 0 size: 200268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50066 entries, memory: 602.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0076 sec ] + [ GrB_select (hyper to sparse) + 0.00641 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24254e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29946 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49889 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 614940 shallow: 0 total: 614940 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 399112 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480200 shallow: 0 size: 199556 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49888 entries, memory: 600.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0076 sec ] + [ GrB_select (hyper to sparse) + 0.00572 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00753 sec ] + [ GrB_select (hyper to sparse) + 0.00647 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00755 sec ] + [ GrB_select (hyper to sparse) + 0.00572 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00187 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.12e-05 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000149 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.82328e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36250 +bucket 2: 24182 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60433 + vlen: 2003 nvec_nonempty: 1754 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 741468 shallow: 0 total: 741468 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 483464 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc156f900 shallow: 0 size: 241732 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60432 entries, memory: 724.1 KB + pending tuples: 0 max pending: 0 zombies: 428 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 13 + row 163: 12 + row 165: 8 + row 166: 10 + row 167: 18 + column: 1 : 21 entries [19:39] + row 0: 18 + row 2: 1 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + ... + Pending (nil) + + 0.00864 sec ] + [ GrB_select (wait:A 428 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.33754e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 21174 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51761 + vlen: 2003 nvec_nonempty: 1108 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 637404 shallow: 0 total: 637404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 414088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14c6700 shallow: 0 size: 207044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51760 entries, memory: 622.5 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 14 entries [0:13] + row 1: 13 + row 3: 10 + row 4: 11 + row 5: 6 + row 6: 13 + row 7: 13 + row 9: 12 + row 10: 12 + row 156: 12 + row 157: 12 + row 159: 10 + row 160: 10 + row 161: 13 + row 167: 13 + column: 1 : 14 entries [14:27] + row 0: 13 + row 3: 10 + row 4: 11 + row 5: 6 + row 6: 13 + row 7: 13 + row 9: 12 + row 10: 12 + row 156: 12 + row 157: 12 + row 159: 10 + row 160: 10 + row 161: 13 + row 167: 13 + column: 2 : 0 entries [28:27] + column: 3 : 11 entries [28:38] + row 0: 10 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0124 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.19303e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29410 +bucket 2: 19474 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48885 + vlen: 2003 nvec_nonempty: 999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 602892 shallow: 0 total: 602892 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 391080 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147e200 shallow: 0 size: 195540 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48884 entries, memory: 588.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00749 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0116 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13995e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28554 +bucket 2: 19230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47785 + vlen: 2003 nvec_nonempty: 954 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 589692 shallow: 0 total: 589692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 382280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150a300 shallow: 0 size: 191140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47784 entries, memory: 575.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00744 sec ] + [ GrB_select (hyper to sparse) + 0.00552 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13004e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28380 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47577 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 587196 shallow: 0 total: 587196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 380616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1507e00 shallow: 0 size: 190308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47576 entries, memory: 573.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00745 sec ] + [ GrB_select (hyper to sparse) + 0.00554 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1506e00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00743 sec ] + [ GrB_select (hyper to sparse) + 0.00555 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00181 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.27e-05 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00025 sec ] + [ GrB_Matrix_nvals + 1.42e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55906e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32398 +bucket 2: 23484 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55883 + vlen: 2003 nvec_nonempty: 1562 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 686868 shallow: 0 total: 686868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 447064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1562400 shallow: 0 size: 223532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55882 entries, memory: 670.8 KB + pending tuples: 0 max pending: 0 zombies: 290 + + column: 0 : 18 entries [0:17] + row 1: 16 + row 3: 13 + row 4: 14 + row 5: 12 + row 6: 16 + row 7: 16 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + row 157: 15 + row 159: 12 + row 160: 12 + row 161: 17 + row 162: 10 + row 163: 9 + row 166: 6 + row 167: 17 + column: 1 : 20 entries [18:37] + row 0: 16 + row 2: 1 + row 3: 13 + row 4: 13 + row 5: 11 + row 6: 16 + row 7: 16 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + ... + Pending (nil) + + 0.0083 sec ] + [ GrB_select (wait:A 290 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0127 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.17494e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28792 +bucket 2: 19720 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48513 + vlen: 2003 nvec_nonempty: 1059 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 598428 shallow: 0 total: 598428 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 388104 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14baa00 shallow: 0 size: 194052 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48512 entries, memory: 584.4 KB + pending tuples: 0 max pending: 0 zombies: 12 + + column: 0 : 2 entries [0:1] + row 161: 1 + row 167: 1 + column: 1 : 2 entries [2:3] + row 161: 1 + row 167: 1 + column: 2 : 0 entries [4:3] + column: 3 : 0 entries [4:3] + column: 4 : 0 entries [4:3] + column: 5 : 0 entries [4:3] + column: 6 : 18 entries [4:21] + row 7: 17 + row 9: 16 + row 10: 16 + row 11: 16 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 13 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [22:39] + row 6: 17 + row 9: 16 + row 10: 16 + row 11: 16 + row 12: 17 + row 13: 17 + row 15: 17 + ... + Pending (nil) + + 0.00767 sec ] + [ GrB_select (wait:A 12 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0114 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02523e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26864 +bucket 2: 18452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45317 + vlen: 2003 nvec_nonempty: 880 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 560076 shallow: 0 total: 560076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 362536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1477300 shallow: 0 size: 181268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45316 entries, memory: 546.9 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 13 entries [0:12] + row 7: 12 + row 12: 12 + row 13: 12 + row 15: 12 + row 16: 12 + row 17: 12 + row 150: 12 + row 151: 12 + row 155: 12 + row 156: 12 + row 159: 12 + row 160: 12 + row 161: 12 + column: 7 : 13 entries [13:25] + row 6: 12 + row 12: 12 + row 13: 12 + row 15: 12 + row 16: 12 + row 17: 12 + row 150: 12 + row 151: 12 + row 155: 12 + row 156: 12 + row 159: 12 + row 160: 12 + row 161: 12 + column: 8 : 0 entries [26:25] + column: 9 : 0 entries [26:25] + column: 10 : 0 entries [26:25] + ... + Pending (nil) + + 0.00716 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0106 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00291e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26452 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44821 + vlen: 2003 nvec_nonempty: 835 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 554124 shallow: 0 total: 554124 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358568 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fa900 shallow: 0 size: 179284 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44820 entries, memory: 541.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00706 sec ] + [ GrB_select (hyper to sparse) + 0.0053 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14f9800 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00711 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00166 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.14e-05 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000136 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.49359e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 23110 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54697 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 672636 shallow: 0 total: 672636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 437576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155ec00 shallow: 0 size: 218788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54696 entries, memory: 656.9 KB + pending tuples: 0 max pending: 0 zombies: 274 + + column: 0 : 18 entries [0:17] + row 1: 15 + row 3: 12 + row 4: 14 + row 5: 12 + row 6: 15 + row 7: 15 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + row 157: 15 + row 159: 12 + row 160: 12 + row 161: 17 + row 162: 7 + row 163: 8 + row 166: 6 + row 167: 17 + column: 1 : 17 entries [18:34] + row 0: 15 + row 2: 1 + row 3: 12 + row 4: 12 + row 5: 10 + row 6: 15 + row 7: 15 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + ... + Pending (nil) + + 0.00821 sec ] + [ GrB_select (wait:A 274 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0125 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.0822e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28032 +bucket 2: 18526 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46559 + vlen: 2003 nvec_nonempty: 1023 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 574980 shallow: 0 total: 574980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 372472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14e3e00 shallow: 0 size: 186236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46558 entries, memory: 561.5 KB + pending tuples: 0 max pending: 0 zombies: 28 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 15 entries [0:14] + row 7: 14 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 9 + row 13: 9 + row 15: 9 + row 16: 9 + row 150: 11 + row 155: 11 + row 156: 10 + row 157: 10 + row 159: 14 + row 160: 14 + row 161: 14 + column: 7 : 15 entries [15:29] + row 6: 14 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 9 + row 13: 9 + row 15: 9 + row 16: 9 + row 150: 11 + row 155: 11 + row 156: 10 + row 157: 10 + row 159: 14 + row 160: 14 + ... + Pending (nil) + + 0.00736 sec ] + [ GrB_select (wait:A 28 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0122 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:990598 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26420 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44545 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550812 shallow: 0 total: 550812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 356360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fc100 shallow: 0 size: 178180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44544 entries, memory: 537.9 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00705 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0111 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cc800 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00705 sec ] + [ GrB_select (hyper to sparse) + 0.006 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00169 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.96e-05 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000132 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0151 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42226e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 22788 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53375 + vlen: 2003 nvec_nonempty: 1509 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 656772 shallow: 0 total: 656772 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 427000 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486e00 shallow: 0 size: 213500 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53374 entries, memory: 641.4 KB + pending tuples: 0 max pending: 0 zombies: 244 + + column: 0 : 13 entries [0:12] + row 1: 8 + row 3: 6 + row 4: 10 + row 5: 4 + row 6: 8 + row 7: 8 + row 9: 8 + row 10: 8 + row 156: 8 + row 157: 8 + row 162: 5 + row 166: 3 + row 167: 6 + column: 1 : 10 entries [13:22] + row 0: 8 + row 3: 6 + row 4: 6 + row 6: 8 + row 7: 8 + row 9: 8 + row 10: 8 + row 156: 7 + row 157: 7 + row 163: 2 + column: 2 : 0 entries [23:22] + column: 3 : 7 entries [23:29] + row 0: 6 + row 1: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (wait:A 244 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0122 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02089e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26828 +bucket 2: 18392 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45221 + vlen: 2003 nvec_nonempty: 954 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 558924 shallow: 0 total: 558924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 361768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150b000 shallow: 0 size: 180884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45220 entries, memory: 545.8 KB + pending tuples: 0 max pending: 0 zombies: 26 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 3 entries [0:2] + row 7: 2 + row 9: 2 + row 10: 2 + column: 7 : 3 entries [3:5] + row 6: 2 + row 9: 2 + row 10: 2 + column: 8 : 0 entries [6:5] + column: 9 : 3 entries [6:8] + row 6: 2 + row 7: 2 + row 10: 2 + column: 10 : 3 entries [9:11] + row 6: 2 + row 7: 2 + row 9: 2 + ... + Pending (nil) + + 0.00711 sec ] + [ GrB_select (wait:A 26 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0109 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cdd00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00706 sec ] + [ GrB_select (hyper to sparse) + 0.00603 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00173 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.12e-05 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000114 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.377e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30114 +bucket 2: 22404 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52519 + vlen: 2003 nvec_nonempty: 1497 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 646500 shallow: 0 total: 646500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 420152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1485400 shallow: 0 size: 210076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52518 entries, memory: 631.3 KB + pending tuples: 0 max pending: 0 zombies: 234 + + column: 0 : 11 entries [0:10] + row 1: 6 + row 3: 6 + row 4: 10 + row 5: 4 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + row 162: 3 + row 166: 3 + row 167: 4 + column: 1 : 8 entries [11:18] + row 0: 6 + row 3: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + row 163: zombie + column: 2 : 0 entries [19:18] + column: 3 : 7 entries [19:25] + row 0: 6 + row 1: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + column: 4 : 11 entries [26:36] + row 0: 10 + row 1: 6 + row 3: 6 + ... + Pending (nil) + + 0.00781 sec ] + [ GrB_select (wait:A 234 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0121 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00479e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26616 +bucket 2: 18246 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44863 + vlen: 2003 nvec_nonempty: 917 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 554628 shallow: 0 total: 554628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1508300 shallow: 0 size: 179452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44862 entries, memory: 541.6 KB + pending tuples: 0 max pending: 0 zombies: 24 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 1 entries [0:0] + row 7: zombie + column: 7 : 1 entries [1:1] + row 6: zombie + column: 8 : 0 entries [2:1] + column: 9 : 0 entries [2:1] + column: 10 : 0 entries [2:1] + ... + Pending (nil) + + 0.00708 sec ] + [ GrB_select (wait:A 24 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0113 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cd100 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00704 sec ] + [ GrB_select (hyper to sparse) + 0.006 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00171 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.17e-05 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000134 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.31715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29374 +bucket 2: 21990 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51365 + vlen: 2003 nvec_nonempty: 1453 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 632652 shallow: 0 total: 632652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 410920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1483000 shallow: 0 size: 205460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51364 entries, memory: 617.8 KB + pending tuples: 0 max pending: 0 zombies: 200 + + column: 0 : 8 entries [0:7] + row 1: 2 + row 4: 3 + row 5: 2 + row 6: 2 + row 7: 2 + row 162: 2 + row 166: 1 + row 167: 4 + column: 1 : 5 entries [8:12] + row 0: 2 + row 3: zombie + row 6: 2 + row 7: 2 + row 163: zombie + column: 2 : 0 entries [13:12] + column: 3 : 1 entries [13:13] + row 1: zombie + column: 4 : 4 entries [14:17] + row 0: 3 + row 5: 2 + row 162: 2 + row 167: 3 + column: 5 : 3 entries [18:20] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 16 entries [21:36] + row 0: 2 + row 1: 2 + row 7: 15 + row 8: 3 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 4 + ... + Pending (nil) + + 0.00773 sec ] + [ GrB_select (wait:A 200 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0119 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:977035 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26282 +bucket 2: 17956 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44239 + vlen: 2003 nvec_nonempty: 869 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 547140 shallow: 0 total: 547140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 353912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1504400 shallow: 0 size: 176956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44238 entries, memory: 534.3 KB + pending tuples: 0 max pending: 0 zombies: 8 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00699 sec ] + [ GrB_select (wait:A 8 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0103 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:938723 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25688 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43363 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 536628 shallow: 0 total: 536628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 346904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14c9d00 shallow: 0 size: 173452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43362 entries, memory: 524.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00692 sec ] + [ GrB_select (hyper to sparse) + 0.00586 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472b00 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00688 sec ] + [ GrB_select (hyper to sparse) + 0.00577 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472200 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00684 sec ] + [ GrB_select (hyper to sparse) + 0.00559 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472200 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00679 sec ] + [ GrB_select (hyper to sparse) + 0.0051 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472100 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00686 sec ] + [ GrB_select (hyper to sparse) + 0.00508 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00162 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.03e-05 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000127 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23111e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29440 +bucket 2: 20218 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49659 + vlen: 2003 nvec_nonempty: 1421 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612180 shallow: 0 total: 612180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fa00 shallow: 0 size: 198636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49658 entries, memory: 597.8 KB + pending tuples: 0 max pending: 0 zombies: 180 + + column: 0 : 8 entries [0:7] + row 1: 2 + row 4: 3 + row 5: 2 + row 6: 2 + row 7: 2 + row 162: 2 + row 166: 1 + row 167: 4 + column: 1 : 4 entries [8:11] + row 0: 2 + row 6: 2 + row 7: 2 + row 163: zombie + column: 2 : 0 entries [12:11] + column: 3 : 0 entries [12:11] + column: 4 : 4 entries [12:15] + row 0: 3 + row 5: 2 + row 162: 2 + row 167: 3 + column: 5 : 3 entries [16:18] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 15 entries [19:33] + row 0: 2 + row 1: 2 + row 7: 14 + row 8: 1 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 3 + row 13: 3 + row 150: 8 + ... + Pending (nil) + + 0.00767 sec ] + [ GrB_select (wait:A 180 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0113 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:751128 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24834 +bucket 2: 13954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 38789 + vlen: 2003 nvec_nonempty: 831 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 481740 shallow: 0 total: 481740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 310312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14f1100 shallow: 0 size: 155156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 38788 entries, memory: 470.4 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00653 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0108 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:636932 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23340 +bucket 2: 12378 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35719 + vlen: 2003 nvec_nonempty: 730 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 444900 shallow: 0 total: 444900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 285752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14b0300 shallow: 0 size: 142876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35718 entries, memory: 434.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00635 sec ] + [ GrB_select (hyper to sparse) + 0.0052 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:620984 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23360 +bucket 2: 11908 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35269 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 439500 shallow: 0 total: 439500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 282152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463900 shallow: 0 size: 141076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35268 entries, memory: 429.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00638 sec ] + [ GrB_select (hyper to sparse) + 0.00443 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612911 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23636 +bucket 2: 11402 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35039 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436740 shallow: 0 total: 436740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 280312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463100 shallow: 0 size: 140156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35038 entries, memory: 426.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00628 sec ] + [ GrB_select (hyper to sparse) + 0.00516 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611653 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23678 +bucket 2: 11324 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35003 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436308 shallow: 0 total: 436308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 280024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463000 shallow: 0 size: 140012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35002 entries, memory: 426.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00646 sec ] + [ GrB_select (hyper to sparse) + 0.0044 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463000 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00628 sec ] + [ GrB_select (hyper to sparse) + 0.00439 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1462f00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00629 sec ] + [ GrB_select (hyper to sparse) + 0.00439 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00146 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.87e-05 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000122 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:863070 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29724 +bucket 2: 11854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 41579 + vlen: 2003 nvec_nonempty: 1368 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 515220 shallow: 0 total: 515220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 332632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146fe00 shallow: 0 size: 166316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 41578 entries, memory: 503.1 KB + pending tuples: 0 max pending: 0 zombies: 128 + + column: 0 : 4 entries [0:3] + row 4: 2 + row 5: 2 + row 162: 1 + row 167: 3 + column: 1 : 0 entries [4:3] + column: 2 : 0 entries [4:3] + column: 3 : 0 entries [4:3] + column: 4 : 3 entries [4:6] + row 0: 2 + row 5: 2 + row 167: 2 + column: 5 : 3 entries [7:9] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 9 entries [10:18] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [19:27] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [28:27] + column: 9 : 9 entries [28:36] + row 6: 8 + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (wait:A 128 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00972 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:367034 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26862 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 27115 + vlen: 2003 nvec_nonempty: 713 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 341652 shallow: 0 total: 341652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 216920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1453a00 shallow: 0 size: 108460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 27114 entries, memory: 333.6 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00544 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00733 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484b00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00425 sec ] + [ GrB_select (hyper to sparse) + 0.00432 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00127 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000197 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.57e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0142 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825705 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29092 +bucket 2: 11576 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40669 + vlen: 2003 nvec_nonempty: 1347 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 504300 shallow: 0 total: 504300 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 325352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146e100 shallow: 0 size: 162676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40668 entries, memory: 492.5 KB + pending tuples: 0 max pending: 0 zombies: 98 + + column: 0 : 3 entries [0:2] + row 4: 2 + row 5: 2 + row 167: 2 + column: 1 : 0 entries [3:2] + column: 2 : 0 entries [3:2] + column: 3 : 0 entries [3:2] + column: 4 : 3 entries [3:5] + row 0: 2 + row 5: 2 + row 167: 2 + column: 5 : 3 entries [6:8] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 9 entries [9:17] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [18:26] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [27:26] + column: 9 : 9 entries [27:35] + row 6: 8 + row 7: 8 + ... + Pending (nil) + + 0.00681 sec ] + [ GrB_select (wait:A 98 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00925 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:366168 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26830 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 27083 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 341268 shallow: 0 total: 341268 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 216664 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14bd500 shallow: 0 size: 108332 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 27082 entries, memory: 333.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00547 sec ] + [ GrB_select (hyper to sparse) + 0.00359 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc144fb00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00424 sec ] + [ GrB_select (hyper to sparse) + 0.0043 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00122 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.52e-05 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.72e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0157 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:625854 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28470 +bucket 2: 6936 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35407 + vlen: 2003 nvec_nonempty: 1290 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 441156 shallow: 0 total: 441156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 283256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463d00 shallow: 0 size: 141628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35406 entries, memory: 430.8 KB + pending tuples: 0 max pending: 0 zombies: 86 + + column: 0 : 1 entries [0:0] + row 4: zombie + column: 1 : 0 entries [1:0] + column: 2 : 0 entries [1:0] + column: 3 : 0 entries [1:0] + column: 4 : 1 entries [1:1] + row 0: zombie + column: 5 : 0 entries [2:1] + column: 6 : 9 entries [2:10] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [11:19] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [20:19] + column: 9 : 9 entries [20:28] + row 6: 8 + row 7: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 10 : 9 entries [29:37] + ... + Pending (nil) + + 0.00644 sec ] + [ GrB_select (wait:A 86 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0083 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:152477 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17224 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17477 + vlen: 2003 nvec_nonempty: 601 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 225996 shallow: 0 total: 225996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a9e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 139816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 69908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17476 entries, memory: 220.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00463 sec ] + [ GrB_select (hyper to sparse) + 0.00259 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106712 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14620 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14621 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 191724 shallow: 0 total: 191724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 116968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 58484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14620 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00323 sec ] + [ GrB_select (hyper to sparse) + 0.00332 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00351 sec ] + [ GrB_select (hyper to sparse) + 0.0025 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000945 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.44e-05 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.13e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0105 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0157 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:585584 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28216 +bucket 2: 6032 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34249 + vlen: 2003 nvec_nonempty: 1133 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 427260 shallow: 0 total: 427260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 273992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1461900 shallow: 0 size: 136996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34248 entries, memory: 417.2 KB + pending tuples: 0 max pending: 0 zombies: 68 + + column: 0 : 1 entries [0:0] + row 4: zombie + column: 1 : 0 entries [1:0] + column: 2 : 0 entries [1:0] + column: 3 : 0 entries [1:0] + column: 4 : 1 entries [1:1] + row 0: zombie + column: 5 : 0 entries [2:1] + column: 6 : 9 entries [2:10] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [11:19] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [20:19] + column: 9 : 9 entries [20:28] + row 6: 8 + row 7: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 10 : 9 entries [29:37] + ... + Pending (nil) + + 0.00637 sec ] + [ GrB_select (wait:A 68 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00789 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:137143 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 16418 +bucket 2: 156 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 16575 + vlen: 2003 nvec_nonempty: 597 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 215172 shallow: 0 total: 215172 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 132600 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 66300 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 16574 entries, memory: 210.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00409 sec ] + [ GrB_select (hyper to sparse) + 0.00239 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:78332.8 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12526 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12527 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 166596 shallow: 0 total: 166596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 100216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 50108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12526 entries, memory: 162.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00306 sec ] + [ GrB_select (hyper to sparse) + 0.0028 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc141ea00 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00295 sec ] + [ GrB_select (hyper to sparse) + 0.00235 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000903 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.17e-05 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.38e-05 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0133 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418713 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28960 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28961 + vlen: 2003 nvec_nonempty: 1107 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 363804 shallow: 0 total: 363804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 231688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1457400 shallow: 0 size: 115844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28960 entries, memory: 355.3 KB + pending tuples: 0 max pending: 0 zombies: 64 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 9 entries [0:8] + row 7: 8 + row 9: 6 + row 10: 6 + row 11: 4 + row 156: 7 + row 157: 7 + row 159: 5 + row 160: 5 + row 161: 8 + column: 7 : 9 entries [9:17] + row 6: 8 + row 9: 6 + row 10: 6 + row 11: 4 + row 156: 7 + row 157: 7 + row 159: 5 + row 160: 5 + row 161: 8 + column: 8 : 0 entries [18:17] + column: 9 : 7 entries [18:24] + row 6: 6 + row 7: 6 + row 10: 6 + row 11: 4 + row 156: 5 + row 157: 5 + row 161: 6 + column: 10 : 7 entries [25:31] + row 6: 6 + row 7: 6 + row 9: 6 + row 11: 4 + ... + Pending (nil) + + 0.00455 sec ] + [ GrB_select (wait:A 64 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00677 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:14601.3 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 5408 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 5409 + vlen: 2003 nvec_nonempty: 543 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 81180 shallow: 0 total: 81180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df7200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 43272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc06a3600 shallow: 0 size: 21636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 5408 entries, memory: 79.3 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00161 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) C is empty, iso 0 + + 0.00288 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 + 0.00016 sec ] + [ GrB_select C is empty, iso 0 + + 0.000376 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=H.*H) (jit: cpu load) + 0.000557 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:0 gpus:0 + 5.25e-06 sec ] + [ GrB_reduce work:0 gpus:0 + 4.45e-06 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0187 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df4700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00864 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000109 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0096 sec ] + [ GrB_select (hyper to sparse) + 0.00871 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7900 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0698c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00968 sec ] + [ GrB_select (hyper to sparse) + 0.0086 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000117 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00975 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc069cb00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0097 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0172 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00971 sec ] + [ GrB_select (hyper to sparse) + 0.00853 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc069cb00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00976 sec ] + [ GrB_select (hyper to sparse) + 0.00848 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.38e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27866e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52638 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81039 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 988740 shallow: 0 total: 988740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81038 entries, memory: 965.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52600 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81001 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 988284 shallow: 0 total: 988284 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648008 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 324004 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81000 entries, memory: 965.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00981 sec ] + [ GrB_select (hyper to sparse) + 0.00849 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.273e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52568 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80969 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 987900 shallow: 0 total: 987900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80968 entries, memory: 964.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00972 sec ] + [ GrB_select (hyper to sparse) + 0.00848 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00974 sec ] + [ GrB_select (hyper to sparse) + 0.00843 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00848 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00847 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.59e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00846 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.20416e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51834 +bucket 2: 28278 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80113 + vlen: 2003 nvec_nonempty: 1935 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 977628 shallow: 0 total: 977628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 640904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 320452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80112 entries, memory: 954.7 KB + pending tuples: 0 max pending: 0 zombies: 36 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00961 sec ] + [ GrB_select (wait:A 36 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.17559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51490 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79755 + vlen: 2003 nvec_nonempty: 1926 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 973332 shallow: 0 total: 973332 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1556e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 638040 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1968700 shallow: 0 size: 319020 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79754 entries, memory: 950.5 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00957 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0183 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.15333e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51226 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79475 + vlen: 2003 nvec_nonempty: 1911 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 969972 shallow: 0 total: 969972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1559a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 635800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79474 entries, memory: 947.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (hyper to sparse) + 0.0089 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14223e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51092 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79335 + vlen: 2003 nvec_nonempty: 1901 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 968292 shallow: 0 total: 968292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79334 entries, memory: 945.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00953 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00909 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00904 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (hyper to sparse) + 0.00834 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00908 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00836 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00011 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.00912 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.04165e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49922 +bucket 2: 28132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 78055 + vlen: 2003 nvec_nonempty: 1878 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 952932 shallow: 0 total: 952932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 624440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 312220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 78054 entries, memory: 930.6 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00956 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.97438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49104 +bucket 2: 28082 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77187 + vlen: 2003 nvec_nonempty: 1839 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 942516 shallow: 0 total: 942516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1551c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 617496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1a50300 shallow: 0 size: 308748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77186 entries, memory: 920.4 KB + pending tuples: 0 max pending: 0 zombies: 16 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00946 sec ] + [ GrB_select (wait:A 16 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0172 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.94317e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48762 +bucket 2: 28018 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76781 + vlen: 2003 nvec_nonempty: 1813 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 937644 shallow: 0 total: 937644 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc154f500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 614248 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 307124 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76780 entries, memory: 915.7 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93367e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48680 +bucket 2: 27976 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76657 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 936156 shallow: 0 total: 936156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c6200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1a50300 shallow: 0 size: 306628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76656 entries, memory: 914.2 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93092e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48660 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76621 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935724 shallow: 0 total: 935724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1551900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76620 entries, memory: 913.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (hyper to sparse) + 0.00891 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00933 sec ] + [ GrB_select (hyper to sparse) + 0.00818 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (hyper to sparse) + 0.00895 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00819 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00941 sec ] + [ GrB_select (hyper to sparse) + 0.00878 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.78437e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47082 +bucket 2: 27598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74681 + vlen: 2003 nvec_nonempty: 1767 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 912444 shallow: 0 total: 912444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 597448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 298724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74680 entries, memory: 891.1 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00919 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0166 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.70046e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46074 +bucket 2: 27472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73547 + vlen: 2003 nvec_nonempty: 1701 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 898836 shallow: 0 total: 898836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 588376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1b30a00 shallow: 0 size: 294188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73546 entries, memory: 877.8 KB + pending tuples: 0 max pending: 0 zombies: 46 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00917 sec ] + [ GrB_select (wait:A 46 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67745e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45800 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73233 + vlen: 2003 nvec_nonempty: 1643 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 895068 shallow: 0 total: 895068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73232 entries, memory: 874.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00909 sec ] + [ GrB_select (hyper to sparse) + 0.00786 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67423e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45756 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73189 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894540 shallow: 0 total: 894540 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585512 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292756 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73188 entries, memory: 873.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00905 sec ] + [ GrB_select (hyper to sparse) + 0.00862 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6735e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45748 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73179 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894420 shallow: 0 total: 894420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73178 entries, memory: 873.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00903 sec ] + [ GrB_select (hyper to sparse) + 0.00784 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00905 sec ] + [ GrB_select (hyper to sparse) + 0.00859 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00907 sec ] + [ GrB_select (hyper to sparse) + 0.00784 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.64363e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45482 +bucket 2: 27286 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72769 + vlen: 2003 nvec_nonempty: 1633 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 889500 shallow: 0 total: 889500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 582152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 291076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72768 entries, memory: 868.7 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00902 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0172 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45228 +bucket 2: 27192 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72421 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 885324 shallow: 0 total: 885324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 579368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1c07100 shallow: 0 size: 289684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72420 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.009 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6093e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45104 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72295 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 883812 shallow: 0 total: 883812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72294 entries, memory: 863.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00897 sec ] + [ GrB_select (hyper to sparse) + 0.00853 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00895 sec ] + [ GrB_select (hyper to sparse) + 0.00782 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000117 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.009 sec ] + [ GrB_select (hyper to sparse) + 0.00946 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.51488e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 44110 +bucket 2: 26864 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70975 + vlen: 2003 nvec_nonempty: 1605 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 867972 shallow: 0 total: 867972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 567800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 283900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70974 entries, memory: 847.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0178 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.45627e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43418 +bucket 2: 26724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70143 + vlen: 2003 nvec_nonempty: 1580 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 857988 shallow: 0 total: 857988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 561144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 280572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70142 entries, memory: 837.9 KB + pending tuples: 0 max pending: 0 zombies: 22 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00891 sec ] + [ GrB_select (wait:A 22 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.41651e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43012 +bucket 2: 26560 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69573 + vlen: 2003 nvec_nonempty: 1564 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 851148 shallow: 0 total: 851148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 556584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 278292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69572 entries, memory: 831.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39834e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42848 +bucket 2: 26462 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69311 + vlen: 2003 nvec_nonempty: 1552 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 848004 shallow: 0 total: 848004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 554488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 277244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69310 entries, memory: 828.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00885 sec ] + [ GrB_select (hyper to sparse) + 0.00751 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.00881 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00879 sec ] + [ GrB_select (hyper to sparse) + 0.0075 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (hyper to sparse) + 0.00828 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000128 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0089 sec ] + [ GrB_select (hyper to sparse) + 0.00745 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.30569e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41946 +bucket 2: 26012 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67959 + vlen: 2003 nvec_nonempty: 1533 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 831780 shallow: 0 total: 831780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 543672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 271836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67958 entries, memory: 812.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0087 sec ] + [ GrB_select (hyper to sparse) + 0.00809 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.23646e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41162 +bucket 2: 25768 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66931 + vlen: 2003 nvec_nonempty: 1506 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 819444 shallow: 0 total: 819444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 535448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 267724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66930 entries, memory: 800.2 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00862 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0153 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21965e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41002 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66679 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 816420 shallow: 0 total: 816420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66678 entries, memory: 797.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21659e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40982 +bucket 2: 25650 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66633 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815868 shallow: 0 total: 815868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66632 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (hyper to sparse) + 0.00728 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21552e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40968 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66617 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815676 shallow: 0 total: 815676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66616 entries, memory: 796.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00879 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00863 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00855 sec ] + [ GrB_select (hyper to sparse) + 0.00729 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0300 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00885 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0300 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00863 sec ] + [ GrB_select (hyper to sparse) + 0.00728 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (hyper to sparse) + 0.008 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00862 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00861 sec ] + [ GrB_select (hyper to sparse) + 0.008 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00861 sec ] + [ GrB_select (hyper to sparse) + 0.00725 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.34e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00713 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.88498e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36722 +bucket 2: 24724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 61447 + vlen: 2003 nvec_nonempty: 1387 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 753636 shallow: 0 total: 753636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 491576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 245788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 61446 entries, memory: 736.0 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 16 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00826 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77056e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35084 +bucket 2: 24468 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59553 + vlen: 2003 nvec_nonempty: 1198 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 730908 shallow: 0 total: 730908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 476424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150af00 shallow: 0 size: 238212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59552 entries, memory: 713.8 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00809 sec ] + [ GrB_select (hyper to sparse) + 0.00729 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75218e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34876 +bucket 2: 24366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59243 + vlen: 2003 nvec_nonempty: 1175 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 727188 shallow: 0 total: 727188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492600 shallow: 0 size: 236972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59242 entries, memory: 710.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00808 sec ] + [ GrB_select (hyper to sparse) + 0.0067 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492400 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00806 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492400 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00807 sec ] + [ GrB_select (hyper to sparse) + 0.00658 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34622 +bucket 2: 24236 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58859 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 722580 shallow: 0 total: 722580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 470872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1491a00 shallow: 0 size: 235436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58858 entries, memory: 705.6 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00809 sec ] + [ GrB_select (hyper to sparse) + 0.00726 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71055e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34400 +bucket 2: 24134 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58535 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 718692 shallow: 0 total: 718692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 468280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1491000 shallow: 0 size: 234140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58534 entries, memory: 701.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00804 sec ] + [ GrB_select (hyper to sparse) + 0.00651 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.69713e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34242 +bucket 2: 24062 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58305 + vlen: 2003 nvec_nonempty: 1165 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 715932 shallow: 0 total: 715932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 466440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490900 shallow: 0 size: 233220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58304 entries, memory: 699.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.008 sec ] + [ GrB_select (hyper to sparse) + 0.00726 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68771e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34156 +bucket 2: 23986 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58143 + vlen: 2003 nvec_nonempty: 1163 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 713988 shallow: 0 total: 713988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 465144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490300 shallow: 0 size: 232572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58142 entries, memory: 697.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00798 sec ] + [ GrB_select (hyper to sparse) + 0.00648 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67635e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34032 +bucket 2: 23914 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57947 + vlen: 2003 nvec_nonempty: 1161 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 711636 shallow: 0 total: 711636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 463576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148fd00 shallow: 0 size: 231788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57946 entries, memory: 695.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00798 sec ] + [ GrB_select (hyper to sparse) + 0.00722 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66884e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33960 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57817 + vlen: 2003 nvec_nonempty: 1156 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 710076 shallow: 0 total: 710076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f900 shallow: 0 size: 231268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57816 entries, memory: 693.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00645 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66642e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33938 +bucket 2: 23836 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57775 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 709572 shallow: 0 total: 709572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f800 shallow: 0 size: 231100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57774 entries, memory: 692.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00793 sec ] + [ GrB_select (hyper to sparse) + 0.00719 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23806 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57741 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 709164 shallow: 0 total: 709164 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461928 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 230964 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57740 entries, memory: 692.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00793 sec ] + [ GrB_select (hyper to sparse) + 0.00643 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66307e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23782 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57717 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 708876 shallow: 0 total: 708876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57716 entries, memory: 692.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00794 sec ] + [ GrB_select (hyper to sparse) + 0.00721 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (hyper to sparse) + 0.00645 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000121 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00802 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.61411e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33340 +bucket 2: 23520 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56861 + vlen: 2003 nvec_nonempty: 1145 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 698604 shallow: 0 total: 698604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 454888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148db00 shallow: 0 size: 227444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56860 entries, memory: 682.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00788 sec ] + [ GrB_select (hyper to sparse) + 0.0063 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.57629e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32886 +bucket 2: 23304 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56191 + vlen: 2003 nvec_nonempty: 1126 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 690564 shallow: 0 total: 690564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 449528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148c600 shallow: 0 size: 224764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56190 entries, memory: 674.4 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00777 sec ] + [ GrB_select (hyper to sparse) + 0.00699 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55449e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32744 +bucket 2: 23056 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55801 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 685884 shallow: 0 total: 685884 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 446408 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148ba00 shallow: 0 size: 223204 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55800 entries, memory: 669.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0078 sec ] + [ GrB_select (hyper to sparse) + 0.00626 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32734 +bucket 2: 22874 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55609 + vlen: 2003 nvec_nonempty: 1112 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 683580 shallow: 0 total: 683580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 444872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b400 shallow: 0 size: 222436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55608 entries, memory: 667.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00782 sec ] + [ GrB_select (hyper to sparse) + 0.00698 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32638 +bucket 2: 22854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55493 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 682188 shallow: 0 total: 682188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55492 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00782 sec ] + [ GrB_select (hyper to sparse) + 0.00623 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00784 sec ] + [ GrB_select (hyper to sparse) + 0.00699 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00788 sec ] + [ GrB_select (hyper to sparse) + 0.00624 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.50245e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32174 +bucket 2: 22684 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54859 + vlen: 2003 nvec_nonempty: 1100 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 674580 shallow: 0 total: 674580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 438872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1489d00 shallow: 0 size: 219436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54858 entries, memory: 658.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00774 sec ] + [ GrB_select (hyper to sparse) + 0.00689 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45808e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31858 +bucket 2: 22184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54043 + vlen: 2003 nvec_nonempty: 1091 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 664788 shallow: 0 total: 664788 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 432344 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1488300 shallow: 0 size: 216172 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54042 entries, memory: 649.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00772 sec ] + [ GrB_select (hyper to sparse) + 0.0061 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42407e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 21822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53409 + vlen: 2003 nvec_nonempty: 1084 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 657180 shallow: 0 total: 657180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 427272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1487000 shallow: 0 size: 213636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53408 entries, memory: 641.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00761 sec ] + [ GrB_select (hyper to sparse) + 0.00684 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486a00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0076 sec ] + [ GrB_select (hyper to sparse) + 0.00606 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.24e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486a00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00758 sec ] + [ GrB_select (hyper to sparse) + 0.00669 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.35413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31126 +bucket 2: 20954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52081 + vlen: 2003 nvec_nonempty: 1060 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 641244 shallow: 0 total: 641244 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 416648 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484600 shallow: 0 size: 208324 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52080 entries, memory: 626.2 KB + + column: 0 : 15 entries [0:14] + row 1: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + row 167: 14 + column: 1 : 15 entries [15:29] + row 0: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + ... + Pending (nil) + + 0.00754 sec ] + [ GrB_select (hyper to sparse) + 0.00658 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.289e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 20226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50813 + vlen: 2003 nvec_nonempty: 1029 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 626028 shallow: 0 total: 626028 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 406504 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 203252 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50812 entries, memory: 611.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00746 sec ] + [ GrB_select (hyper to sparse) + 0.00583 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25563e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30158 +bucket 2: 19992 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50151 + vlen: 2003 nvec_nonempty: 1016 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 618084 shallow: 0 total: 618084 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 401208 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480a00 shallow: 0 size: 200604 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50150 entries, memory: 603.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00739 sec ] + [ GrB_select (hyper to sparse) + 0.00655 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24304e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29956 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49899 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 615060 shallow: 0 total: 615060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 399192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480200 shallow: 0 size: 199596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49898 entries, memory: 600.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00734 sec ] + [ GrB_select (hyper to sparse) + 0.00576 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0073 sec ] + [ GrB_select (hyper to sparse) + 0.00651 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0073 sec ] + [ GrB_select (hyper to sparse) + 0.00577 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdc00 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00739 sec ] + [ GrB_select (hyper to sparse) + 0.00576 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.18699e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29308 +bucket 2: 19452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48761 + vlen: 2003 nvec_nonempty: 984 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 601404 shallow: 0 total: 601404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 390088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147de00 shallow: 0 size: 195044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48760 entries, memory: 587.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00726 sec ] + [ GrB_select (hyper to sparse) + 0.00603 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13766e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28540 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47737 + vlen: 2003 nvec_nonempty: 937 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 589116 shallow: 0 total: 589116 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 381896 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147be00 shallow: 0 size: 190948 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47736 entries, memory: 575.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00725 sec ] + [ GrB_select (hyper to sparse) + 0.00558 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13337e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28450 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47647 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 588036 shallow: 0 total: 588036 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 381176 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147bb00 shallow: 0 size: 190588 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47646 entries, memory: 574.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00723 sec ] + [ GrB_select (hyper to sparse) + 0.00636 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28390 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47587 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 587316 shallow: 0 total: 587316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 380696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147ba00 shallow: 0 size: 190348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47586 entries, memory: 573.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00723 sec ] + [ GrB_select (hyper to sparse) + 0.00562 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147b600 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0072 sec ] + [ GrB_select (hyper to sparse) + 0.0063 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdd00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147b600 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00727 sec ] + [ GrB_select (hyper to sparse) + 0.00554 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.08267e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 27588 +bucket 2: 18980 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46569 + vlen: 2003 nvec_nonempty: 914 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 575100 shallow: 0 total: 575100 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 372552 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479a00 shallow: 0 size: 186276 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46568 entries, memory: 561.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 18 entries [0:17] + row 7: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 16 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [18:35] + row 6: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + ... + Pending (nil) + + 0.00713 sec ] + [ GrB_select (hyper to sparse) + 0.00545 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02496e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26894 +bucket 2: 18416 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45311 + vlen: 2003 nvec_nonempty: 858 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 560004 shallow: 0 total: 560004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 362488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1477200 shallow: 0 size: 181244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45310 entries, memory: 546.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 16 entries [0:15] + row 7: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + row 159: 15 + row 160: 15 + row 161: 15 + column: 7 : 16 entries [16:31] + row 6: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + ... + Pending (nil) + + 0.00703 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00229e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26438 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44807 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 553956 shallow: 0 total: 553956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476300 shallow: 0 size: 179228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44806 entries, memory: 541.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.0061 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (hyper to sparse) + 0.00612 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.46e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (hyper to sparse) + 0.00533 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdc00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc141ea00 shallow: 0 size: 16032 + ->i: 0x7effc1422900 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479700 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (hyper to sparse) + 0.00609 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000133 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc141ea00 shallow: 0 size: 16032 + ->i: 0x7effc1422900 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479700 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00701 sec ] + [ GrB_select (hyper to sparse) + 0.00533 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000116 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1422900 shallow: 0 size: 16032 + ->i: 0x7effc1426800 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147d600 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00694 sec ] + [ GrB_select (hyper to sparse) + 0.00607 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987221 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26344 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44469 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 549900 shallow: 0 total: 549900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481500 shallow: 0 size: 177876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44468 entries, memory: 537.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481400 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00529 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000117 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481400 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:975092 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26240 +bucket 2: 17954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44195 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 546612 shallow: 0 total: 546612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 353560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484c00 shallow: 0 size: 176780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44194 entries, memory: 533.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00681 sec ] + [ GrB_select (hyper to sparse) + 0.00524 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:954375 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25948 +bucket 2: 17774 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43723 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 540948 shallow: 0 total: 540948 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 349784 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1483d00 shallow: 0 size: 174892 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43722 entries, memory: 528.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00678 sec ] + [ GrB_select (hyper to sparse) + 0.00592 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:929136 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25466 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43141 + vlen: 2003 nvec_nonempty: 809 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 533964 shallow: 0 total: 533964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 345128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1482b00 shallow: 0 size: 172564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43140 entries, memory: 521.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00669 sec ] + [ GrB_select (hyper to sparse) + 0.00515 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1482700 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00669 sec ] + [ GrB_select (hyper to sparse) + 0.00527 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00666 sec ] + [ GrB_select (hyper to sparse) + 0.00553 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00665 sec ] + [ GrB_select (hyper to sparse) + 0.00514 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00668 sec ] + [ GrB_select (hyper to sparse) + 0.00589 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.05e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556a00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00671 sec ] + [ GrB_select (hyper to sparse) + 0.005 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825948 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25120 +bucket 2: 15554 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40675 + vlen: 2003 nvec_nonempty: 789 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 504372 shallow: 0 total: 504372 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142e600 shallow: 0 size: 16032 + ->i: 0x7effc1432500 shallow: 0 size: 325400 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 162700 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40674 entries, memory: 492.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00656 sec ] + [ GrB_select (hyper to sparse) + 0.00542 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:672510 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23224 +bucket 2: 13478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 36703 + vlen: 2003 nvec_nonempty: 736 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 456708 shallow: 0 total: 456708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 293624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 146812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 36702 entries, memory: 446.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00627 sec ] + [ GrB_select (hyper to sparse) + 0.00455 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:629110 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23236 +bucket 2: 12262 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35499 + vlen: 2003 nvec_nonempty: 698 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 442260 shallow: 0 total: 442260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1508400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 283992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146fd00 shallow: 0 size: 141996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35498 entries, memory: 431.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00618 sec ] + [ GrB_select (hyper to sparse) + 0.0049 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:619084 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23468 +bucket 2: 11746 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35215 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 438852 shallow: 0 total: 438852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fe400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 281720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146f400 shallow: 0 size: 140860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35214 entries, memory: 428.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00619 sec ] + [ GrB_select (hyper to sparse) + 0.00446 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612282 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23654 +bucket 2: 11366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35021 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 436524 shallow: 0 total: 436524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fcb00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 280168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146ee00 shallow: 0 size: 140084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35020 entries, memory: 426.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00613 sec ] + [ GrB_select (hyper to sparse) + 0.0052 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fbb00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ffa00 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00611 sec ] + [ GrB_select (hyper to sparse) + 0.00437 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb700 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ff600 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00614 sec ] + [ GrB_select (hyper to sparse) + 0.00514 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.65e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb500 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ff400 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00613 sec ] + [ GrB_select (hyper to sparse) + 0.00397 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418019 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24542 +bucket 2: 4394 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28937 + vlen: 2003 nvec_nonempty: 630 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 363516 shallow: 0 total: 363516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ff400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 231496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 115748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28936 entries, memory: 355.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00586 sec ] + [ GrB_select (hyper to sparse) + 0.00378 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:328878 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25594 +bucket 2: 72 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25667 + vlen: 2003 nvec_nonempty: 579 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 324276 shallow: 0 total: 324276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ff400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 205336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 102668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25666 entries, memory: 316.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00458 sec ] + [ GrB_select (hyper to sparse) + 0.00364 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ab500 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00416 sec ] + [ GrB_select (hyper to sparse) + 0.0036 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000101 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14a9100 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00411 sec ] + [ GrB_select (hyper to sparse) + 0.00436 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000105 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556d00 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ad000 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00417 sec ] + [ GrB_select (hyper to sparse) + 0.0031 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:151605 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17426 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17427 + vlen: 2003 nvec_nonempty: 480 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 225396 shallow: 0 total: 225396 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb500 shallow: 0 size: 16032 + ->i: 0x7effc14ff400 shallow: 0 size: 139416 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 69708 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17426 entries, memory: 220.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00344 sec ] + [ GrB_select (hyper to sparse) + 0.00266 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106625 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14614 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14615 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc148c700 number of memory blocks: 4 + deep: 191652 shallow: 0 total: 191652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ad000 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14614 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00328 sec ] + [ GrB_select (hyper to sparse) + 0.00262 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1480100 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00347 sec ] + [ GrB_select (hyper to sparse) + 0.00256 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.78e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556e00 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00314 sec ] + [ GrB_select (hyper to sparse) + 0.00257 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:88339.1 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 13302 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 13303 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 175908 shallow: 0 total: 175908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1483e00 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 106424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1515500 shallow: 0 size: 53212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 13302 entries, memory: 171.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00306 sec ] + [ GrB_select (hyper to sparse) + 0.00289 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:77211.2 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12436 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12437 + vlen: 2003 nvec_nonempty: 314 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1454a00 number of memory blocks: 4 + deep: 165516 shallow: 0 total: 165516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 99496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12436 entries, memory: 161.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00302 sec ] + [ GrB_select (hyper to sparse) + 0.00315 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00294 sec ] + [ GrB_select (hyper to sparse) + 0.00247 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.74e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556f00 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.003 sec ] + [ GrB_select + 0.00142 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:66277 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 3668 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 3669 + vlen: 2003 nvec_nonempty: 203 nvec: 203 plen: 203 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc014fa00 number of memory blocks: 5 + deep: 47524 shallow: 0 total: 47524 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc030b600 shallow: 0 size: 1624 + ->p: 0x7effc06b0700 shallow: 0 size: 1632 + ->i: 0x7effc142a700 shallow: 0 size: 29352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147ff00 shallow: 0 size: 14676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 3668 entries, memory: 46.4 KB + + column: 933 : 13 entries [0:12] + row 934: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 934 : 13 entries [13:25] + row 933: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 935 : 13 entries [26:38] + row 933: 12 + row 934: 12 + row 936: 12 + ... + Pending (nil) + + 0.00169 sec ] + [ GrB_select + 0.001 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:1512 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 252 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 253 + vlen: 2003 nvec_nonempty: 42 nvec: 42 plen: 42 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06b0100 number of memory blocks: 5 + deep: 3956 shallow: 0 total: 3956 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc1557200 shallow: 0 size: 336 + ->p: 0x7effc1557000 shallow: 0 size: 344 + ->i: 0x7effc06ff400 shallow: 0 size: 2024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc030b600 shallow: 0 size: 1012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 252 entries, memory: 3.9 KB + + column: 1031 : 6 entries [0:5] + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1032 : 6 entries [6:11] + row 1031: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1033 : 6 entries [12:17] + row 1031: 5 + row 1032: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1034 : 6 entries [18:23] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1038 : 6 entries [24:29] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1039: 5 + ... + Pending (nil) + + 0.00151 sec ] + [ GrB_select C is empty, iso 0 + + 0.000452 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 + 0.000172 sec ] + [ GrB_select C is empty, iso 0 + + 0.000396 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 4.51e-06 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00191 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000127 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00162 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00165 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00158 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00165 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00011 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00165 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00155 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.77e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00154 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.76e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00152 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.82e-05 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00148 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00011 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00183 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.01e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00179 sec ] + [ GrB_Matrix_nvals + 2.24e-07 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00171 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.77e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00169 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.6e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00159 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.8e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00151 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.69e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00149 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.7e-05 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00147 sec ] + [ GrB_Matrix_nvals + 1.42e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.41e-05 sec ] + [ GrB_Matrix_nvals + 1.56e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00151 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.33e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0014 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.48e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00143 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.54e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00127 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.62e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00101 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.55e-05 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000979 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.45e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000749 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.78e-05 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000724 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.67e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=H.*H) + 0.00025 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 4.19e-06 sec ] +[ OK ] +SUCCESS: All unit tests have passed. diff --git a/save_errors/o2 b/save_errors/o2 new file mode 100644 index 0000000000..685de24182 --- /dev/null +++ b/save_errors/o2 @@ -0,0 +1,9392 @@ +Test allktruss... GB_cuda_get_device_count: 4, cudaError_t: 0 + +Device: 0: memory: 17071800320 SMs: 56 compute: 6.0 +GB_cuda_init: 0 + +================================== bcsstk13.mtx: + [ GrB_Matrix_build_FP64 (cast J 1 0) (step1: 0.00223201 sec) (step2: 0.0206186 sec) (build, 1 threads) (step3: 0.000838824 sec) (step4: 0.000637904 sec) (jit: cpu load) (step5: 0.000988573 sec) (build 32/32 time: 0.0253593) (hyper to sparse) (wrapup 64/64 time: 0.0032275) (convert ints 32/32 to 64/64, time: 0.00065919) + 0.0293 sec ] + [ GxB_Vector_diag (jit: cuda load) (sparse to hyper) (sparse to full) + 0.00599 sec ] + [ GrB_Vector_nvals + 1.18e-06 sec ] +graph has 2003 self edges + [ GrB_select (jit: cuda load) + 0.00714 sec ] +now has 0 self edges + [ GrB_Matrix_nvals + 1.56e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc01fba00 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc01ff900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (jit: cuda load) (hyper to sparse) (jit: cuda load) (hyper to sparse) + 0.0186 sec ] + [ GrB_Matrix_nvals + 2.24e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024f800 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00908 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.00034 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc024b900 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc014bf00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0105 sec ] + [ GrB_select (hyper to sparse) + 0.00887 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024b900 shallow: 0 size: 16032 + ->i: 0x7effc0008200 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0148000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.09e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00932 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0393000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00977 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0179 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0057600 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0098 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000104 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (hyper to sparse) + 0.00932 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27866e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52638 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81039 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988740 shallow: 0 total: 988740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81038 entries, memory: 965.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0096 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52600 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81001 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988284 shallow: 0 total: 988284 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648008 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 324004 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81000 entries, memory: 965.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00931 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.273e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52568 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80969 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987900 shallow: 0 total: 987900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80968 entries, memory: 964.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00983 sec ] + [ GrB_select (hyper to sparse) + 0.00926 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.00853 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00972 sec ] + [ GrB_select (hyper to sparse) + 0.00929 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000104 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00972 sec ] + [ GrB_select (hyper to sparse) + 0.00854 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.20416e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51834 +bucket 2: 28278 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80113 + vlen: 2003 nvec_nonempty: 1935 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 977628 shallow: 0 total: 977628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 640904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 320452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80112 entries, memory: 954.7 KB + pending tuples: 0 max pending: 0 zombies: 36 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00959 sec ] + [ GrB_select (wait:A 36 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.17559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51490 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79755 + vlen: 2003 nvec_nonempty: 1926 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 973332 shallow: 0 total: 973332 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 638040 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc08c2f00 shallow: 0 size: 319020 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79754 entries, memory: 950.5 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.15333e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51226 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79475 + vlen: 2003 nvec_nonempty: 1911 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 969972 shallow: 0 total: 969972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038f000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 635800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79474 entries, memory: 947.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.0084 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14223e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51092 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79335 + vlen: 2003 nvec_nonempty: 1901 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 968292 shallow: 0 total: 968292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79334 entries, memory: 945.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00953 sec ] + [ GrB_select (hyper to sparse) + 0.00916 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00954 sec ] + [ GrB_select (hyper to sparse) + 0.00839 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (hyper to sparse) + 0.00918 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00845 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.00917 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00839 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00915 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00953 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.04165e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49922 +bucket 2: 28132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 78055 + vlen: 2003 nvec_nonempty: 1878 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 952932 shallow: 0 total: 952932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 624440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 312220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 78054 entries, memory: 930.6 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0094 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0173 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.97438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49104 +bucket 2: 28082 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77187 + vlen: 2003 nvec_nonempty: 1839 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 942516 shallow: 0 total: 942516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038e800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 617496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc09aab00 shallow: 0 size: 308748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77186 entries, memory: 920.4 KB + pending tuples: 0 max pending: 0 zombies: 16 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00938 sec ] + [ GrB_select (wait:A 16 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.94317e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48762 +bucket 2: 28018 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76781 + vlen: 2003 nvec_nonempty: 1813 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 937644 shallow: 0 total: 937644 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038db00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 614248 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 307124 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76780 entries, memory: 915.7 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93367e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48680 +bucket 2: 27976 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76657 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 936156 shallow: 0 total: 936156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc072e800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 613256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc09aab00 shallow: 0 size: 306628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76656 entries, memory: 914.2 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93092e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48660 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76621 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935724 shallow: 0 total: 935724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038d600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76620 entries, memory: 913.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00925 sec ] + [ GrB_select (hyper to sparse) + 0.00876 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0390f00 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (hyper to sparse) + 0.00825 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (hyper to sparse) + 0.00824 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00925 sec ] + [ GrB_select (hyper to sparse) + 0.00898 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000133 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00932 sec ] + [ GrB_select (hyper to sparse) + 0.00808 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.78437e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47082 +bucket 2: 27598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74681 + vlen: 2003 nvec_nonempty: 1767 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 912444 shallow: 0 total: 912444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 597448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 298724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74680 entries, memory: 891.1 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00912 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0185 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.70046e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46074 +bucket 2: 27472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73547 + vlen: 2003 nvec_nonempty: 1701 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 898836 shallow: 0 total: 898836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0383700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 588376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0a8b200 shallow: 0 size: 294188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73546 entries, memory: 877.8 KB + pending tuples: 0 max pending: 0 zombies: 46 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00908 sec ] + [ GrB_select (wait:A 46 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67745e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45800 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73233 + vlen: 2003 nvec_nonempty: 1643 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 895068 shallow: 0 total: 895068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0382d00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73232 entries, memory: 874.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00908 sec ] + [ GrB_select (hyper to sparse) + 0.00836 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67423e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45756 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73189 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 894540 shallow: 0 total: 894540 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585512 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292756 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73188 entries, memory: 873.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00931 sec ] + [ GrB_select (hyper to sparse) + 0.00782 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6735e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45748 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73179 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824700 number of memory blocks: 4 + deep: 894420 shallow: 0 total: 894420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73178 entries, memory: 873.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (hyper to sparse) + 0.00861 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824700 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (hyper to sparse) + 0.00783 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000121 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00935 sec ] + [ GrB_select (hyper to sparse) + 0.00861 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.64363e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45482 +bucket 2: 27286 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72769 + vlen: 2003 nvec_nonempty: 1633 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 889500 shallow: 0 total: 889500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 582152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 291076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72768 entries, memory: 868.7 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00893 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45228 +bucket 2: 27192 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72421 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 885324 shallow: 0 total: 885324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081c800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 579368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0b61900 shallow: 0 size: 289684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72420 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.009 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6093e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45104 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72295 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 883812 shallow: 0 total: 883812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc072d600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72294 entries, memory: 863.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00897 sec ] + [ GrB_select (hyper to sparse) + 0.00781 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00891 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000212 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015eb00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00894 sec ] + [ GrB_select (hyper to sparse) + 0.00774 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.51488e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 44110 +bucket 2: 26864 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70975 + vlen: 2003 nvec_nonempty: 1605 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 867972 shallow: 0 total: 867972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 567800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 283900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70974 entries, memory: 847.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0167 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.45627e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43418 +bucket 2: 26724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70143 + vlen: 2003 nvec_nonempty: 1580 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 857988 shallow: 0 total: 857988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037cc00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 561144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 280572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70142 entries, memory: 837.9 KB + pending tuples: 0 max pending: 0 zombies: 22 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00884 sec ] + [ GrB_select (wait:A 22 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.41651e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43012 +bucket 2: 26560 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69573 + vlen: 2003 nvec_nonempty: 1564 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 851148 shallow: 0 total: 851148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037fa00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 556584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 278292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69572 entries, memory: 831.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00874 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39834e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42848 +bucket 2: 26462 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69311 + vlen: 2003 nvec_nonempty: 1552 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 848004 shallow: 0 total: 848004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037b200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 554488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 277244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69310 entries, memory: 828.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00878 sec ] + [ GrB_select (hyper to sparse) + 0.008 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015eb00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0152 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00877 sec ] + [ GrB_select (hyper to sparse) + 0.00822 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00878 sec ] + [ GrB_select (hyper to sparse) + 0.00749 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000124 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00881 sec ] + [ GrB_select (hyper to sparse) + 0.00819 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.30569e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41946 +bucket 2: 26012 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67959 + vlen: 2003 nvec_nonempty: 1533 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 831780 shallow: 0 total: 831780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 543672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 271836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67958 entries, memory: 812.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00868 sec ] + [ GrB_select (hyper to sparse) + 0.00735 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.23646e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41162 +bucket 2: 25768 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66931 + vlen: 2003 nvec_nonempty: 1506 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 819444 shallow: 0 total: 819444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 535448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 267724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66930 entries, memory: 800.2 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0161 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21965e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41002 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66679 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 816420 shallow: 0 total: 816420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ad00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 533432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66678 entries, memory: 797.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21659e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40982 +bucket 2: 25650 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66633 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815868 shallow: 0 total: 815868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ad00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 533064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66632 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0085 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21552e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40968 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66617 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815676 shallow: 0 total: 815676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66616 entries, memory: 796.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00851 sec ] + [ GrB_select (hyper to sparse) + 0.00726 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0085 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ab00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00887 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ab00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00903 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081aa00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00888 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00891 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00902 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00887 sec ] + [ GrB_select (hyper to sparse) + 0.00899 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00902 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00858 sec ] + [ GrB_select (hyper to sparse) + 0.00902 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000103 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00855 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.88498e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36722 +bucket 2: 24724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 61447 + vlen: 2003 nvec_nonempty: 1387 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 753636 shallow: 0 total: 753636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 491576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 245788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 61446 entries, memory: 736.0 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 16 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00831 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.014 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77056e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35084 +bucket 2: 24468 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59553 + vlen: 2003 nvec_nonempty: 1198 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 730908 shallow: 0 total: 730908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0d39a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 476424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 238212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59552 entries, memory: 713.8 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.0081 sec ] + [ GrB_select (hyper to sparse) + 0.00735 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75218e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34876 +bucket 2: 24366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59243 + vlen: 2003 nvec_nonempty: 1175 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 727188 shallow: 0 total: 727188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59242 entries, memory: 710.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00802 sec ] + [ GrB_select (hyper to sparse) + 0.00659 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0d39400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00816 sec ] + [ GrB_select (hyper to sparse) + 0.00729 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.87e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00802 sec ] + [ GrB_select (hyper to sparse) + 0.00662 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34622 +bucket 2: 24236 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58859 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 722580 shallow: 0 total: 722580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 470872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 235436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58858 entries, memory: 705.6 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.00735 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71055e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34400 +bucket 2: 24134 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58535 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 718692 shallow: 0 total: 718692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 468280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 234140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58534 entries, memory: 701.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00801 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.69713e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34242 +bucket 2: 24062 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58305 + vlen: 2003 nvec_nonempty: 1165 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 715932 shallow: 0 total: 715932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 466440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 233220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58304 entries, memory: 699.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00798 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68771e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34156 +bucket 2: 23986 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58143 + vlen: 2003 nvec_nonempty: 1163 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 713988 shallow: 0 total: 713988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 465144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 232572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58142 entries, memory: 697.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00652 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67635e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34032 +bucket 2: 23914 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57947 + vlen: 2003 nvec_nonempty: 1161 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 711636 shallow: 0 total: 711636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 463576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57946 entries, memory: 695.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00794 sec ] + [ GrB_select (hyper to sparse) + 0.00725 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66884e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33960 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57817 + vlen: 2003 nvec_nonempty: 1156 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 710076 shallow: 0 total: 710076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 462536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57816 entries, memory: 693.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00794 sec ] + [ GrB_select (hyper to sparse) + 0.00651 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66642e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33938 +bucket 2: 23836 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57775 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 709572 shallow: 0 total: 709572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 462200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57774 entries, memory: 692.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00793 sec ] + [ GrB_select (hyper to sparse) + 0.00721 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23806 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57741 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 709164 shallow: 0 total: 709164 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461928 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230964 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57740 entries, memory: 692.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00792 sec ] + [ GrB_select (hyper to sparse) + 0.00652 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66307e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23782 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57717 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 708876 shallow: 0 total: 708876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57716 entries, memory: 692.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00792 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000133 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00803 sec ] + [ GrB_select (hyper to sparse) + 0.0072 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.61411e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33340 +bucket 2: 23520 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56861 + vlen: 2003 nvec_nonempty: 1145 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 698604 shallow: 0 total: 698604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 454888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 227444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56860 entries, memory: 682.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00785 sec ] + [ GrB_select (hyper to sparse) + 0.00633 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.57629e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32886 +bucket 2: 23304 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56191 + vlen: 2003 nvec_nonempty: 1126 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 690564 shallow: 0 total: 690564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 449528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 224764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56190 entries, memory: 674.4 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0078 sec ] + [ GrB_select (hyper to sparse) + 0.00706 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55449e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32744 +bucket 2: 23056 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55801 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0398000 number of memory blocks: 4 + deep: 685884 shallow: 0 total: 685884 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 446408 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 223204 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55800 entries, memory: 669.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00675 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32734 +bucket 2: 22874 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55609 + vlen: 2003 nvec_nonempty: 1112 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 683580 shallow: 0 total: 683580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 444872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 222436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55608 entries, memory: 667.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00791 sec ] + [ GrB_select (hyper to sparse) + 0.00631 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32638 +bucket 2: 22854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55493 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 682188 shallow: 0 total: 682188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55492 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00787 sec ] + [ GrB_select (hyper to sparse) + 0.00708 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00787 sec ] + [ GrB_select (hyper to sparse) + 0.00632 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.8e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00787 sec ] + [ GrB_select (hyper to sparse) + 0.00699 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.50245e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32174 +bucket 2: 22684 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54859 + vlen: 2003 nvec_nonempty: 1100 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 674580 shallow: 0 total: 674580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 438872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 219436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54858 entries, memory: 658.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00772 sec ] + [ GrB_select (hyper to sparse) + 0.00624 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45808e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31858 +bucket 2: 22184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54043 + vlen: 2003 nvec_nonempty: 1091 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 664788 shallow: 0 total: 664788 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 432344 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 216172 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54042 entries, memory: 649.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00767 sec ] + [ GrB_select (hyper to sparse) + 0.00686 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42407e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 21822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53409 + vlen: 2003 nvec_nonempty: 1084 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0736900 number of memory blocks: 4 + deep: 657180 shallow: 0 total: 657180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 427272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 213636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53408 entries, memory: 641.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00769 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00776 sec ] + [ GrB_select (hyper to sparse) + 0.00686 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.89e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00761 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.35413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31126 +bucket 2: 20954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52081 + vlen: 2003 nvec_nonempty: 1060 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 641244 shallow: 0 total: 641244 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 416648 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 208324 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52080 entries, memory: 626.2 KB + + column: 0 : 15 entries [0:14] + row 1: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + row 167: 14 + column: 1 : 15 entries [15:29] + row 0: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + ... + Pending (nil) + + 0.0075 sec ] + [ GrB_select (hyper to sparse) + 0.00669 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.289e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 20226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50813 + vlen: 2003 nvec_nonempty: 1029 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 626028 shallow: 0 total: 626028 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 406504 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 203252 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50812 entries, memory: 611.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00752 sec ] + [ GrB_select (hyper to sparse) + 0.00586 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25563e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30158 +bucket 2: 19992 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50151 + vlen: 2003 nvec_nonempty: 1016 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 618084 shallow: 0 total: 618084 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 401208 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 200604 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50150 entries, memory: 603.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00742 sec ] + [ GrB_select (hyper to sparse) + 0.00658 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24304e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29956 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49899 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 615060 shallow: 0 total: 615060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 399192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 199596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49898 entries, memory: 600.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0074 sec ] + [ GrB_select (hyper to sparse) + 0.00579 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00737 sec ] + [ GrB_select (hyper to sparse) + 0.00656 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00733 sec ] + [ GrB_select (hyper to sparse) + 0.00584 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00012 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6c00 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00743 sec ] + [ GrB_select (hyper to sparse) + 0.00649 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.18699e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29308 +bucket 2: 19452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48761 + vlen: 2003 nvec_nonempty: 984 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 601404 shallow: 0 total: 601404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 390088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 195044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48760 entries, memory: 587.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00727 sec ] + [ GrB_select (hyper to sparse) + 0.00564 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13766e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28540 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47737 + vlen: 2003 nvec_nonempty: 937 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 589116 shallow: 0 total: 589116 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 381896 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190948 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47736 entries, memory: 575.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00729 sec ] + [ GrB_select (hyper to sparse) + 0.00639 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13337e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28450 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47647 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 588036 shallow: 0 total: 588036 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 381176 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190588 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47646 entries, memory: 574.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00725 sec ] + [ GrB_select (hyper to sparse) + 0.00559 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28390 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47587 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 587316 shallow: 0 total: 587316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 380696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47586 entries, memory: 573.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00724 sec ] + [ GrB_select (hyper to sparse) + 0.00635 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00724 sec ] + [ GrB_select (hyper to sparse) + 0.00561 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000128 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6d00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00729 sec ] + [ GrB_select (hyper to sparse) + 0.00554 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.08267e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 27588 +bucket 2: 18980 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46569 + vlen: 2003 nvec_nonempty: 914 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 575100 shallow: 0 total: 575100 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 372552 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 186276 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46568 entries, memory: 561.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 18 entries [0:17] + row 7: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 16 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [18:35] + row 6: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + ... + Pending (nil) + + 0.00709 sec ] + [ GrB_select (hyper to sparse) + 0.0058 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02496e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26894 +bucket 2: 18416 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45311 + vlen: 2003 nvec_nonempty: 858 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 560004 shallow: 0 total: 560004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 362488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 181244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45310 entries, memory: 546.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 16 entries [0:15] + row 7: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + row 159: 15 + row 160: 15 + row 161: 15 + column: 7 : 16 entries [16:31] + row 6: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + ... + Pending (nil) + + 0.00703 sec ] + [ GrB_select (hyper to sparse) + 0.00613 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00229e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26438 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44807 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 553956 shallow: 0 total: 553956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44806 entries, memory: 541.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00536 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00688 sec ] + [ GrB_select (hyper to sparse) + 0.0061 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000122 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00692 sec ] + [ GrB_select (hyper to sparse) + 0.00535 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6c00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017a400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00608 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00012 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017a400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00691 sec ] + [ GrB_select (hyper to sparse) + 0.00532 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000124 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017e300 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.0061 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987221 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26344 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44469 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 549900 shallow: 0 total: 549900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44468 entries, memory: 537.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00533 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00688 sec ] + [ GrB_select (hyper to sparse) + 0.00612 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000124 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00692 sec ] + [ GrB_select (hyper to sparse) + 0.00532 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:975092 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26240 +bucket 2: 17954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44195 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 546612 shallow: 0 total: 546612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 353560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 176780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44194 entries, memory: 533.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00679 sec ] + [ GrB_select (hyper to sparse) + 0.00602 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:954375 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25948 +bucket 2: 17774 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43723 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 540948 shallow: 0 total: 540948 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 349784 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 174892 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43722 entries, memory: 528.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00682 sec ] + [ GrB_select (hyper to sparse) + 0.00519 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:929136 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25466 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43141 + vlen: 2003 nvec_nonempty: 809 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 533964 shallow: 0 total: 533964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 345128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 172564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43140 entries, memory: 521.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00667 sec ] + [ GrB_select (hyper to sparse) + 0.00595 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00668 sec ] + [ GrB_select (hyper to sparse) + 0.00517 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00663 sec ] + [ GrB_select (hyper to sparse) + 0.00594 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00663 sec ] + [ GrB_select (hyper to sparse) + 0.00516 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00665 sec ] + [ GrB_select (hyper to sparse) + 0.0059 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000121 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7200 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0067 sec ] + [ GrB_select (hyper to sparse) + 0.00502 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825948 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25120 +bucket 2: 15554 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40675 + vlen: 2003 nvec_nonempty: 789 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 504372 shallow: 0 total: 504372 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 325400 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 162700 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40674 entries, memory: 492.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00651 sec ] + [ GrB_select (hyper to sparse) + 0.00491 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:672510 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23224 +bucket 2: 13478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 36703 + vlen: 2003 nvec_nonempty: 736 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 456708 shallow: 0 total: 456708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 293624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 146812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 36702 entries, memory: 446.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00627 sec ] + [ GrB_select (hyper to sparse) + 0.00494 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:629110 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23236 +bucket 2: 12262 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35499 + vlen: 2003 nvec_nonempty: 698 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 442260 shallow: 0 total: 442260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 283992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 141996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35498 entries, memory: 431.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00619 sec ] + [ GrB_select (hyper to sparse) + 0.00448 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:619084 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23468 +bucket 2: 11746 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35215 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0700a00 number of memory blocks: 4 + deep: 438852 shallow: 0 total: 438852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 281720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 140860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35214 entries, memory: 428.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00624 sec ] + [ GrB_select (hyper to sparse) + 0.00443 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612282 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23654 +bucket 2: 11366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35021 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06ffe00 number of memory blocks: 4 + deep: 436524 shallow: 0 total: 436524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 280168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 140084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35020 entries, memory: 426.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0062 sec ] + [ GrB_select (hyper to sparse) + 0.00441 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00617 sec ] + [ GrB_select (hyper to sparse) + 0.0044 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00618 sec ] + [ GrB_select (hyper to sparse) + 0.00517 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.06e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00621 sec ] + [ GrB_select (hyper to sparse) + 0.00403 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418019 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24542 +bucket 2: 4394 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28937 + vlen: 2003 nvec_nonempty: 630 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 363516 shallow: 0 total: 363516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0358500 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 231496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 115748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28936 entries, memory: 355.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00581 sec ] + [ GrB_select (hyper to sparse) + 0.0038 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:328878 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25594 +bucket 2: 72 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25667 + vlen: 2003 nvec_nonempty: 579 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 324276 shallow: 0 total: 324276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 205336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 102668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25666 entries, memory: 316.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00463 sec ] + [ GrB_select (hyper to sparse) + 0.00412 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc035a700 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00429 sec ] + [ GrB_select (hyper to sparse) + 0.00438 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.61e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0359e00 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00417 sec ] + [ GrB_select (hyper to sparse) + 0.00362 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.15e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7500 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00416 sec ] + [ GrB_select (hyper to sparse) + 0.00312 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:151605 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17426 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17427 + vlen: 2003 nvec_nonempty: 480 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 225396 shallow: 0 total: 225396 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 139416 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 69708 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17426 entries, memory: 220.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00347 sec ] + [ GrB_select (hyper to sparse) + 0.00267 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106625 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14614 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14615 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 191652 shallow: 0 total: 191652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0decb00 shallow: 0 size: 58460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14614 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00318 sec ] + [ GrB_select (hyper to sparse) + 0.00293 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00316 sec ] + [ GrB_select (hyper to sparse) + 0.00346 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000112 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7600 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00319 sec ] + [ GrB_select (hyper to sparse) + 0.00264 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:88339.1 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 13302 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 13303 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 175908 shallow: 0 total: 175908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0191e00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 106424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 53212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 13302 entries, memory: 171.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00302 sec ] + [ GrB_select (hyper to sparse) + 0.00307 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:77211.2 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12436 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12437 + vlen: 2003 nvec_nonempty: 314 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dcf00 number of memory blocks: 4 + deep: 165516 shallow: 0 total: 165516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 99496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12436 entries, memory: 161.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00314 sec ] + [ GrB_select (hyper to sparse) + 0.00286 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00348 sec ] + [ GrB_select (hyper to sparse) + 0.00315 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.85e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7700 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00295 sec ] + [ GrB_select + 0.00147 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:66277 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 3668 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 3669 + vlen: 2003 nvec_nonempty: 203 nvec: 203 plen: 203 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc014f900 number of memory blocks: 5 + deep: 47524 shallow: 0 total: 47524 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc06ffb00 shallow: 0 size: 1624 + ->p: 0x7effc06ff400 shallow: 0 size: 1632 + ->i: 0x7effc0dec900 shallow: 0 size: 29352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc06d9000 shallow: 0 size: 14676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 3668 entries, memory: 46.4 KB + + column: 933 : 13 entries [0:12] + row 934: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 934 : 13 entries [13:25] + row 933: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 935 : 13 entries [26:38] + row 933: 12 + row 934: 12 + row 936: 12 + ... + Pending (nil) + + 0.00191 sec ] + [ GrB_select (jit: cuda load) + 0.00301 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:1512 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 252 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 253 + vlen: 2003 nvec_nonempty: 42 nvec: 42 plen: 42 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 5 + deep: 3956 shallow: 0 total: 3956 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc0dfb500 shallow: 0 size: 336 + ->p: 0x7effc0dfb300 shallow: 0 size: 344 + ->i: 0x7effc00a7400 shallow: 0 size: 2024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc00a7c00 shallow: 0 size: 1012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 252 entries, memory: 3.9 KB + + column: 1031 : 6 entries [0:5] + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1032 : 6 entries [6:11] + row 1031: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1033 : 6 entries [12:17] + row 1031: 5 + row 1032: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1034 : 6 entries [18:23] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1038 : 6 entries [24:29] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1039: 5 + ... + Pending (nil) + + 0.00165 sec ] + [ GrB_select C is empty, iso 0 + + 0.000484 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 (jit: cpu load) + 0.000268 sec ] + [ GrB_select C is empty, iso 0 + + 0.000416 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 4.57e-06 sec ] +all k-truss: kmax 29 + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00985 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00991 sec ] + [ GrB_select (hyper to sparse) + 0.00867 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) (jit: cpu load) + 0.00204 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000301 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000283 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00994 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0181 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 1664666 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + pending tuples: 0 max pending: 0 zombies: 54111 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 3 + row 40: 3 + row 41: 3 + row 156: 28 + row 157: 28 + row 158: 19 + row 159: 26 + row 160: 26 + row 161: 28 + row 162: 16 + row 163: 16 + row 164: 13 + row 165: 14 + row 166: 14 + row 167: 18 + row 168: 6 + row 172: 6 + row 173: 6 + column: 1 : 27 entries [29:55] + ... + invalid zombie count: 1181 exist but A->nzombies = 54111 +assertion failed: /home/faculty/d/davis/dev3/GraphBLAS/CUDA/GB_cuda_AxB_dot3.cpp line 257 diff --git a/save_errors/o3 b/save_errors/o3 new file mode 100644 index 0000000000..79c18bbd92 --- /dev/null +++ b/save_errors/o3 @@ -0,0 +1,29910 @@ +Test allktruss... GB_cuda_get_device_count: 4, cudaError_t: 0 + +Device: 0: memory: 17071800320 SMs: 56 compute: 6.0 +GB_cuda_init: 0 + +================================== bcsstk13.mtx: + [ GrB_Matrix_build_FP64 (cast J 1 0) (step1: 0.00223532 sec) (step2: 0.020692 sec) (build, 1 threads) (step3: 0.000837564 sec) (step4: 0.000640087 sec) (jit: cpu load) (step5: 0.000984691 sec) (build 32/32 time: 0.025437) (hyper to sparse) (wrapup 64/64 time: 0.00322753) (convert ints 32/32 to 64/64, time: 0.000665821) + 0.0294 sec ] + [ GxB_Vector_diag (jit: cuda load) (sparse to hyper) (sparse to full) + 0.00626 sec ] + [ GrB_Vector_nvals + 1.56e-06 sec ] +graph has 2003 self edges + [ GrB_select (jit: cuda load) + 0.00685 sec ] +now has 0 self edges + [ GrB_Matrix_nvals + 1.79e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc01fba00 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc01ff900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (jit: cuda load) (hyper to sparse) (jit: cuda load) (hyper to sparse) + 0.0198 sec ] + [ GrB_Matrix_nvals + 1.86e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024f800 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.0092 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000332 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc024b900 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc014bf00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0105 sec ] + [ GrB_select (hyper to sparse) + 0.00871 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024b900 shallow: 0 size: 16032 + ->i: 0x7effc0008200 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0148000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.13e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00957 sec ] + [ GrB_select (hyper to sparse) + 0.0094 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0393000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00974 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0181 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.00852 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0057600 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00975 sec ] + [ GrB_select (hyper to sparse) + 0.00849 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.89e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00931 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27866e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52638 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81039 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988740 shallow: 0 total: 988740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81038 entries, memory: 965.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0096 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52600 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81001 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988284 shallow: 0 total: 988284 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648008 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 324004 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81000 entries, memory: 965.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00968 sec ] + [ GrB_select (hyper to sparse) + 0.00926 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.273e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52568 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80969 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987900 shallow: 0 total: 987900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80968 entries, memory: 964.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00982 sec ] + [ GrB_select (hyper to sparse) + 0.00922 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00848 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00959 sec ] + [ GrB_select (hyper to sparse) + 0.00928 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.72e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00971 sec ] + [ GrB_select (hyper to sparse) + 0.00849 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.20416e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51834 +bucket 2: 28278 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80113 + vlen: 2003 nvec_nonempty: 1935 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 977628 shallow: 0 total: 977628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 640904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 320452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80112 entries, memory: 954.7 KB + pending tuples: 0 max pending: 0 zombies: 36 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00957 sec ] + [ GrB_select (wait:A 36 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.17559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51490 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79755 + vlen: 2003 nvec_nonempty: 1926 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 973332 shallow: 0 total: 973332 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 638040 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc08c2f00 shallow: 0 size: 319020 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79754 entries, memory: 950.5 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.15333e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51226 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79475 + vlen: 2003 nvec_nonempty: 1911 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 969972 shallow: 0 total: 969972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038f000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 635800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79474 entries, memory: 947.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14223e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51092 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79335 + vlen: 2003 nvec_nonempty: 1901 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 968292 shallow: 0 total: 968292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79334 entries, memory: 945.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00919 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.00914 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00839 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00912 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.00913 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000112 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00957 sec ] + [ GrB_select (hyper to sparse) + 0.00836 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.04165e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49922 +bucket 2: 28132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 78055 + vlen: 2003 nvec_nonempty: 1878 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 952932 shallow: 0 total: 952932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 624440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 312220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 78054 entries, memory: 930.6 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00945 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0172 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.97438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49104 +bucket 2: 28082 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77187 + vlen: 2003 nvec_nonempty: 1839 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 942516 shallow: 0 total: 942516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038e800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 617496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc09aab00 shallow: 0 size: 308748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77186 entries, memory: 920.4 KB + pending tuples: 0 max pending: 0 zombies: 16 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (wait:A 16 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.94317e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48762 +bucket 2: 28018 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76781 + vlen: 2003 nvec_nonempty: 1813 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 937644 shallow: 0 total: 937644 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038db00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 614248 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 307124 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76780 entries, memory: 915.7 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93367e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48680 +bucket 2: 27976 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76657 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 936156 shallow: 0 total: 936156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc072e800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 613256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc09aab00 shallow: 0 size: 306628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76656 entries, memory: 914.2 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93092e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48660 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76621 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935724 shallow: 0 total: 935724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038d600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76620 entries, memory: 913.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00925 sec ] + [ GrB_select (hyper to sparse) + 0.0087 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0390f00 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (hyper to sparse) + 0.00821 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00817 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00925 sec ] + [ GrB_select (hyper to sparse) + 0.00894 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000128 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (hyper to sparse) + 0.00807 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.78437e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47082 +bucket 2: 27598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74681 + vlen: 2003 nvec_nonempty: 1767 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 912444 shallow: 0 total: 912444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 597448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 298724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74680 entries, memory: 891.1 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00914 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0185 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.70046e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46074 +bucket 2: 27472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73547 + vlen: 2003 nvec_nonempty: 1701 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 898836 shallow: 0 total: 898836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0383700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 588376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0a8b200 shallow: 0 size: 294188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73546 entries, memory: 877.8 KB + pending tuples: 0 max pending: 0 zombies: 46 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00906 sec ] + [ GrB_select (wait:A 46 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0172 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67745e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45800 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73233 + vlen: 2003 nvec_nonempty: 1643 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 895068 shallow: 0 total: 895068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0382d00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73232 entries, memory: 874.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00907 sec ] + [ GrB_select (hyper to sparse) + 0.00834 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67423e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45756 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73189 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 894540 shallow: 0 total: 894540 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585512 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292756 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73188 entries, memory: 873.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (hyper to sparse) + 0.00779 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6735e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45748 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73179 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824700 number of memory blocks: 4 + deep: 894420 shallow: 0 total: 894420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73178 entries, memory: 873.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00858 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824700 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00945 sec ] + [ GrB_select (hyper to sparse) + 0.00778 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (hyper to sparse) + 0.00858 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.64363e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45482 +bucket 2: 27286 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72769 + vlen: 2003 nvec_nonempty: 1633 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 889500 shallow: 0 total: 889500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 582152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 291076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72768 entries, memory: 868.7 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00894 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45228 +bucket 2: 27192 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72421 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 885324 shallow: 0 total: 885324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081c800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 579368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0b61900 shallow: 0 size: 289684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72420 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00898 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6093e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45104 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72295 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 883812 shallow: 0 total: 883812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc072d600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72294 entries, memory: 863.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00895 sec ] + [ GrB_select (hyper to sparse) + 0.00776 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (hyper to sparse) + 0.00847 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000203 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015eb00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00892 sec ] + [ GrB_select (hyper to sparse) + 0.00769 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.51488e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 44110 +bucket 2: 26864 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70975 + vlen: 2003 nvec_nonempty: 1605 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 867972 shallow: 0 total: 867972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 567800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 283900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70974 entries, memory: 847.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0089 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.45627e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43418 +bucket 2: 26724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70143 + vlen: 2003 nvec_nonempty: 1580 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 857988 shallow: 0 total: 857988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037cc00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 561144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 280572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70142 entries, memory: 837.9 KB + pending tuples: 0 max pending: 0 zombies: 22 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (wait:A 22 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.41651e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43012 +bucket 2: 26560 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69573 + vlen: 2003 nvec_nonempty: 1564 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 851148 shallow: 0 total: 851148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037fa00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 556584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 278292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69572 entries, memory: 831.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00875 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0157 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39834e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42848 +bucket 2: 26462 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69311 + vlen: 2003 nvec_nonempty: 1552 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 848004 shallow: 0 total: 848004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037b200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 554488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 277244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69310 entries, memory: 828.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (hyper to sparse) + 0.00799 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015eb00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.00881 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0152 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00886 sec ] + [ GrB_select (hyper to sparse) + 0.00826 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00878 sec ] + [ GrB_select (hyper to sparse) + 0.00746 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000121 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00882 sec ] + [ GrB_select (hyper to sparse) + 0.00817 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.30569e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41946 +bucket 2: 26012 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67959 + vlen: 2003 nvec_nonempty: 1533 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 831780 shallow: 0 total: 831780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 543672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 271836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67958 entries, memory: 812.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00866 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.23646e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41162 +bucket 2: 25768 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66931 + vlen: 2003 nvec_nonempty: 1506 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 819444 shallow: 0 total: 819444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 535448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 267724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66930 entries, memory: 800.2 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.016 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21965e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41002 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66679 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 816420 shallow: 0 total: 816420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ad00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 533432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66678 entries, memory: 797.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00855 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21659e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40982 +bucket 2: 25650 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66633 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815868 shallow: 0 total: 815868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ad00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 533064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66632 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00854 sec ] + [ GrB_select (hyper to sparse) + 0.008 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21552e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40968 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66617 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815676 shallow: 0 total: 815676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66616 entries, memory: 796.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00882 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00858 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (hyper to sparse) + 0.00725 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00851 sec ] + [ GrB_select (hyper to sparse) + 0.00801 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ab00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00888 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ab00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00902 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081aa00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00886 sec ] + [ GrB_select (hyper to sparse) + 0.00895 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00887 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00854 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00858 sec ] + [ GrB_select (hyper to sparse) + 0.00901 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00854 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.41e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (hyper to sparse) + 0.00719 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.88498e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36722 +bucket 2: 24724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 61447 + vlen: 2003 nvec_nonempty: 1387 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 753636 shallow: 0 total: 753636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 491576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 245788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 61446 entries, memory: 736.0 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 16 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00828 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0139 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77056e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35084 +bucket 2: 24468 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59553 + vlen: 2003 nvec_nonempty: 1198 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 730908 shallow: 0 total: 730908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0d39a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 476424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 238212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59552 entries, memory: 713.8 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00807 sec ] + [ GrB_select (hyper to sparse) + 0.00733 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75218e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34876 +bucket 2: 24366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59243 + vlen: 2003 nvec_nonempty: 1175 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 727188 shallow: 0 total: 727188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59242 entries, memory: 710.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00804 sec ] + [ GrB_select (hyper to sparse) + 0.00658 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0d39400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00813 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.09e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00802 sec ] + [ GrB_select (hyper to sparse) + 0.00657 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34622 +bucket 2: 24236 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58859 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 722580 shallow: 0 total: 722580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 470872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 235436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58858 entries, memory: 705.6 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71055e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34400 +bucket 2: 24134 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58535 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 718692 shallow: 0 total: 718692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 468280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 234140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58534 entries, memory: 701.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00801 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.69713e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34242 +bucket 2: 24062 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58305 + vlen: 2003 nvec_nonempty: 1165 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 715932 shallow: 0 total: 715932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 466440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 233220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58304 entries, memory: 699.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00726 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68771e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34156 +bucket 2: 23986 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58143 + vlen: 2003 nvec_nonempty: 1163 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 713988 shallow: 0 total: 713988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 465144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 232572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58142 entries, memory: 697.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00799 sec ] + [ GrB_select (hyper to sparse) + 0.00652 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67635e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34032 +bucket 2: 23914 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57947 + vlen: 2003 nvec_nonempty: 1161 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 711636 shallow: 0 total: 711636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 463576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57946 entries, memory: 695.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00794 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66884e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33960 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57817 + vlen: 2003 nvec_nonempty: 1156 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 710076 shallow: 0 total: 710076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 462536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57816 entries, memory: 693.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00793 sec ] + [ GrB_select (hyper to sparse) + 0.00649 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66642e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33938 +bucket 2: 23836 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57775 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 709572 shallow: 0 total: 709572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 462200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57774 entries, memory: 692.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00792 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23806 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57741 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 709164 shallow: 0 total: 709164 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461928 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230964 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57740 entries, memory: 692.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00793 sec ] + [ GrB_select (hyper to sparse) + 0.00647 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66307e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23782 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57717 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 708876 shallow: 0 total: 708876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57716 entries, memory: 692.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00793 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (hyper to sparse) + 0.0065 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000129 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00801 sec ] + [ GrB_select (hyper to sparse) + 0.00778 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.61411e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33340 +bucket 2: 23520 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56861 + vlen: 2003 nvec_nonempty: 1145 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 698604 shallow: 0 total: 698604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 454888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 227444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56860 entries, memory: 682.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00842 sec ] + [ GrB_select (hyper to sparse) + 0.00703 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.57629e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32886 +bucket 2: 23304 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56191 + vlen: 2003 nvec_nonempty: 1126 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 690564 shallow: 0 total: 690564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 449528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 224764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56190 entries, memory: 674.4 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00838 sec ] + [ GrB_select (hyper to sparse) + 0.0077 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55449e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32744 +bucket 2: 23056 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55801 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0398000 number of memory blocks: 4 + deep: 685884 shallow: 0 total: 685884 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 446408 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 223204 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55800 entries, memory: 669.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00845 sec ] + [ GrB_select (hyper to sparse) + 0.00739 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32734 +bucket 2: 22874 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55609 + vlen: 2003 nvec_nonempty: 1112 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 683580 shallow: 0 total: 683580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 444872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 222436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55608 entries, memory: 667.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00839 sec ] + [ GrB_select (hyper to sparse) + 0.00768 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32638 +bucket 2: 22854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55493 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 682188 shallow: 0 total: 682188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55492 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00835 sec ] + [ GrB_select (hyper to sparse) + 0.00699 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00836 sec ] + [ GrB_select (hyper to sparse) + 0.00762 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.19e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0078 sec ] + [ GrB_select (hyper to sparse) + 0.00622 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.50245e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32174 +bucket 2: 22684 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54859 + vlen: 2003 nvec_nonempty: 1100 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 674580 shallow: 0 total: 674580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 438872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 219436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54858 entries, memory: 658.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00771 sec ] + [ GrB_select (hyper to sparse) + 0.00697 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45808e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31858 +bucket 2: 22184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54043 + vlen: 2003 nvec_nonempty: 1091 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 664788 shallow: 0 total: 664788 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 432344 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 216172 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54042 entries, memory: 649.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00766 sec ] + [ GrB_select (hyper to sparse) + 0.0061 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42407e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 21822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53409 + vlen: 2003 nvec_nonempty: 1084 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0736900 number of memory blocks: 4 + deep: 657180 shallow: 0 total: 657180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 427272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 213636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53408 entries, memory: 641.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00767 sec ] + [ GrB_select (hyper to sparse) + 0.00607 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00773 sec ] + [ GrB_select (hyper to sparse) + 0.00683 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.19e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00759 sec ] + [ GrB_select (hyper to sparse) + 0.00603 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.35413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31126 +bucket 2: 20954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52081 + vlen: 2003 nvec_nonempty: 1060 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 641244 shallow: 0 total: 641244 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 416648 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 208324 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52080 entries, memory: 626.2 KB + + column: 0 : 15 entries [0:14] + row 1: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + row 167: 14 + column: 1 : 15 entries [15:29] + row 0: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + ... + Pending (nil) + + 0.00746 sec ] + [ GrB_select (hyper to sparse) + 0.00665 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.289e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 20226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50813 + vlen: 2003 nvec_nonempty: 1029 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 626028 shallow: 0 total: 626028 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 406504 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 203252 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50812 entries, memory: 611.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00749 sec ] + [ GrB_select (hyper to sparse) + 0.00589 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25563e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30158 +bucket 2: 19992 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50151 + vlen: 2003 nvec_nonempty: 1016 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 618084 shallow: 0 total: 618084 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 401208 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 200604 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50150 entries, memory: 603.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00738 sec ] + [ GrB_select (hyper to sparse) + 0.00657 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24304e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29956 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49899 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 615060 shallow: 0 total: 615060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 399192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 199596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49898 entries, memory: 600.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00737 sec ] + [ GrB_select (hyper to sparse) + 0.00578 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00734 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00731 sec ] + [ GrB_select (hyper to sparse) + 0.00581 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6c00 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00741 sec ] + [ GrB_select (hyper to sparse) + 0.00646 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.18699e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29308 +bucket 2: 19452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48761 + vlen: 2003 nvec_nonempty: 984 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 601404 shallow: 0 total: 601404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 390088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 195044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48760 entries, memory: 587.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00727 sec ] + [ GrB_select (hyper to sparse) + 0.00561 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13766e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28540 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47737 + vlen: 2003 nvec_nonempty: 937 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 589116 shallow: 0 total: 589116 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 381896 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190948 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47736 entries, memory: 575.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00727 sec ] + [ GrB_select (hyper to sparse) + 0.00633 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13337e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28450 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47647 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 588036 shallow: 0 total: 588036 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 381176 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190588 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47646 entries, memory: 574.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00726 sec ] + [ GrB_select (hyper to sparse) + 0.0056 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28390 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47587 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 587316 shallow: 0 total: 587316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 380696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47586 entries, memory: 573.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00725 sec ] + [ GrB_select (hyper to sparse) + 0.00634 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00722 sec ] + [ GrB_select (hyper to sparse) + 0.0056 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000122 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6d00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00729 sec ] + [ GrB_select (hyper to sparse) + 0.00549 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.08267e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 27588 +bucket 2: 18980 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46569 + vlen: 2003 nvec_nonempty: 914 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 575100 shallow: 0 total: 575100 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 372552 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 186276 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46568 entries, memory: 561.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 18 entries [0:17] + row 7: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 16 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [18:35] + row 6: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + ... + Pending (nil) + + 0.0071 sec ] + [ GrB_select (hyper to sparse) + 0.00653 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02496e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26894 +bucket 2: 18416 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45311 + vlen: 2003 nvec_nonempty: 858 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 560004 shallow: 0 total: 560004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 362488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 181244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45310 entries, memory: 546.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 16 entries [0:15] + row 7: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + row 159: 15 + row 160: 15 + row 161: 15 + column: 7 : 16 entries [16:31] + row 6: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + ... + Pending (nil) + + 0.007 sec ] + [ GrB_select (hyper to sparse) + 0.00532 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00229e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26438 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44807 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 553956 shallow: 0 total: 553956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44806 entries, memory: 541.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00686 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00686 sec ] + [ GrB_select (hyper to sparse) + 0.00531 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000115 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00606 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6c00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017a400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00686 sec ] + [ GrB_select (hyper to sparse) + 0.00529 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000112 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017a400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00606 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000115 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017e300 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.00529 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987221 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26344 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44469 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 549900 shallow: 0 total: 549900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44468 entries, memory: 537.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00688 sec ] + [ GrB_select (hyper to sparse) + 0.00609 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00684 sec ] + [ GrB_select (hyper to sparse) + 0.00531 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000116 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.00604 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:975092 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26240 +bucket 2: 17954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44195 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 546612 shallow: 0 total: 546612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 353560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 176780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44194 entries, memory: 533.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00681 sec ] + [ GrB_select (hyper to sparse) + 0.00523 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:954375 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25948 +bucket 2: 17774 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43723 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 540948 shallow: 0 total: 540948 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 349784 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 174892 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43722 entries, memory: 528.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00678 sec ] + [ GrB_select (hyper to sparse) + 0.00592 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:929136 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25466 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43141 + vlen: 2003 nvec_nonempty: 809 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 533964 shallow: 0 total: 533964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 345128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 172564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43140 entries, memory: 521.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00666 sec ] + [ GrB_select (hyper to sparse) + 0.00515 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00667 sec ] + [ GrB_select (hyper to sparse) + 0.00588 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00663 sec ] + [ GrB_select (hyper to sparse) + 0.00514 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00663 sec ] + [ GrB_select (hyper to sparse) + 0.00588 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00665 sec ] + [ GrB_select (hyper to sparse) + 0.00514 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000116 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7200 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00672 sec ] + [ GrB_select (hyper to sparse) + 0.00675 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825948 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25120 +bucket 2: 15554 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40675 + vlen: 2003 nvec_nonempty: 789 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 504372 shallow: 0 total: 504372 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 325400 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 162700 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40674 entries, memory: 492.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00649 sec ] + [ GrB_select (hyper to sparse) + 0.00493 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:672510 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23224 +bucket 2: 13478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 36703 + vlen: 2003 nvec_nonempty: 736 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 456708 shallow: 0 total: 456708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 293624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 146812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 36702 entries, memory: 446.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00626 sec ] + [ GrB_select (hyper to sparse) + 0.00492 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:629110 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23236 +bucket 2: 12262 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35499 + vlen: 2003 nvec_nonempty: 698 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 442260 shallow: 0 total: 442260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 283992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 141996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35498 entries, memory: 431.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00619 sec ] + [ GrB_select (hyper to sparse) + 0.00445 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:619084 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23468 +bucket 2: 11746 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35215 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0700a00 number of memory blocks: 4 + deep: 438852 shallow: 0 total: 438852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 281720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 140860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35214 entries, memory: 428.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00619 sec ] + [ GrB_select (hyper to sparse) + 0.00442 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612282 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23654 +bucket 2: 11366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35021 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06ffe00 number of memory blocks: 4 + deep: 436524 shallow: 0 total: 436524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 280168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 140084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35020 entries, memory: 426.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00619 sec ] + [ GrB_select (hyper to sparse) + 0.00439 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00615 sec ] + [ GrB_select (hyper to sparse) + 0.00512 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00618 sec ] + [ GrB_select (hyper to sparse) + 0.00437 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.18e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00614 sec ] + [ GrB_select (hyper to sparse) + 0.00399 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418019 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24542 +bucket 2: 4394 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28937 + vlen: 2003 nvec_nonempty: 630 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 363516 shallow: 0 total: 363516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0358500 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 231496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 115748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28936 entries, memory: 355.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0058 sec ] + [ GrB_select (hyper to sparse) + 0.00376 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:328878 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25594 +bucket 2: 72 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25667 + vlen: 2003 nvec_nonempty: 579 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 324276 shallow: 0 total: 324276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 205336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 102668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25666 entries, memory: 316.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00463 sec ] + [ GrB_select (hyper to sparse) + 0.00406 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc035a700 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00428 sec ] + [ GrB_select (hyper to sparse) + 0.00436 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.52e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0359e00 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00416 sec ] + [ GrB_select (hyper to sparse) + 0.0036 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.57e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7500 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00413 sec ] + [ GrB_select (hyper to sparse) + 0.00308 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:151605 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17426 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17427 + vlen: 2003 nvec_nonempty: 480 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 225396 shallow: 0 total: 225396 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 139416 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 69708 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17426 entries, memory: 220.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00344 sec ] + [ GrB_select (hyper to sparse) + 0.00263 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106625 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14614 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14615 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 191652 shallow: 0 total: 191652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0decb00 shallow: 0 size: 58460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14614 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00317 sec ] + [ GrB_select (hyper to sparse) + 0.00291 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00315 sec ] + [ GrB_select (hyper to sparse) + 0.00344 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000105 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7600 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00317 sec ] + [ GrB_select (hyper to sparse) + 0.00337 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:88339.1 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 13302 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 13303 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 175908 shallow: 0 total: 175908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0191e00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 106424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 53212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 13302 entries, memory: 171.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00304 sec ] + [ GrB_select (hyper to sparse) + 0.00318 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:77211.2 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12436 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12437 + vlen: 2003 nvec_nonempty: 314 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dcf00 number of memory blocks: 4 + deep: 165516 shallow: 0 total: 165516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 99496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12436 entries, memory: 161.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00309 sec ] + [ GrB_select (hyper to sparse) + 0.00282 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00346 sec ] + [ GrB_select (hyper to sparse) + 0.00238 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.18e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7700 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00293 sec ] + [ GrB_select + 0.00143 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:66277 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 3668 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 3669 + vlen: 2003 nvec_nonempty: 203 nvec: 203 plen: 203 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc014f900 number of memory blocks: 5 + deep: 47524 shallow: 0 total: 47524 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc06ffb00 shallow: 0 size: 1624 + ->p: 0x7effc06ff400 shallow: 0 size: 1632 + ->i: 0x7effc0dec900 shallow: 0 size: 29352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc06d9000 shallow: 0 size: 14676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 3668 entries, memory: 46.4 KB + + column: 933 : 13 entries [0:12] + row 934: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 934 : 13 entries [13:25] + row 933: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 935 : 13 entries [26:38] + row 933: 12 + row 934: 12 + row 936: 12 + ... + Pending (nil) + + 0.00189 sec ] + [ GrB_select (jit: cuda load) + 0.00216 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:1512 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 252 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 253 + vlen: 2003 nvec_nonempty: 42 nvec: 42 plen: 42 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 5 + deep: 3956 shallow: 0 total: 3956 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc0dfb500 shallow: 0 size: 336 + ->p: 0x7effc0dfb300 shallow: 0 size: 344 + ->i: 0x7effc00a7400 shallow: 0 size: 2024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc00a7c00 shallow: 0 size: 1012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 252 entries, memory: 3.9 KB + + column: 1031 : 6 entries [0:5] + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1032 : 6 entries [6:11] + row 1031: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1033 : 6 entries [12:17] + row 1031: 5 + row 1032: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1034 : 6 entries [18:23] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1038 : 6 entries [24:29] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1039: 5 + ... + Pending (nil) + + 0.00141 sec ] + [ GrB_select C is empty, iso 0 + + 0.000492 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 (jit: cpu load) + 0.00027 sec ] + [ GrB_select C is empty, iso 0 + + 0.000423 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 5.09e-06 sec ] +all k-truss: kmax 29 + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00984 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0174 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00993 sec ] + [ GrB_select (hyper to sparse) + 0.00865 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) (jit: cpu load) + 0.00205 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000286 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000269 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00989 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0181 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00996 sec ] + [ GrB_select (hyper to sparse) + 0.00861 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00185 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000132 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00988 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0187 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ac900 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00991 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0179 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155bb00 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00959 sec ] + [ GrB_select (hyper to sparse) + 0.00852 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155bb00 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00996 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00186 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000111 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0178 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28935e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52770 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81171 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 990324 shallow: 0 total: 990324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ac500 shallow: 0 size: 324684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81170 entries, memory: 967.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00994 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0178 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52622 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81023 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 988548 shallow: 0 total: 988548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155b400 shallow: 0 size: 324092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81022 entries, memory: 965.4 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00959 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0185 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52582 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80983 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 988068 shallow: 0 total: 988068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15aa300 shallow: 0 size: 323932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80982 entries, memory: 964.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00999 sec ] + [ GrB_select (hyper to sparse) + 0.00927 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15aa100 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00924 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (hyper to sparse) + 0.00922 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00183 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000114 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000156 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.22419e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52082 +bucket 2: 28280 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80363 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 980628 shallow: 0 total: 980628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 642904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 321452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80362 entries, memory: 957.6 KB + pending tuples: 0 max pending: 0 zombies: 38 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00983 sec ] + [ GrB_select (wait:A 38 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.18132e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51562 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79827 + vlen: 2003 nvec_nonempty: 1928 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 974196 shallow: 0 total: 974196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 638616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1557800 shallow: 0 size: 319308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79826 entries, memory: 951.4 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0182 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.1573e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51276 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79525 + vlen: 2003 nvec_nonempty: 1913 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 970572 shallow: 0 total: 970572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 636200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a3a00 shallow: 0 size: 318100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79524 entries, memory: 947.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00976 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14461e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51122 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79365 + vlen: 2003 nvec_nonempty: 1903 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 968652 shallow: 0 total: 968652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a2f00 shallow: 0 size: 317460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79364 entries, memory: 945.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00909 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a2300 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00976 sec ] + [ GrB_select (hyper to sparse) + 0.00831 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1c00 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00828 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1800 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00979 sec ] + [ GrB_select (hyper to sparse) + 0.0083 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1700 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00954 sec ] + [ GrB_select (hyper to sparse) + 0.00827 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1300 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00987 sec ] + [ GrB_select (hyper to sparse) + 0.0083 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1000 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (hyper to sparse) + 0.00825 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00188 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000111 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00016 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12704e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50958 +bucket 2: 28184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79143 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 965988 shallow: 0 total: 965988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a6600 shallow: 0 size: 316572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79142 entries, memory: 943.3 KB + pending tuples: 0 max pending: 0 zombies: 242 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 25 entries [29:53] + ... + Pending (nil) + + 0.00984 sec ] + [ GrB_select (wait:A 242 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.00653e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49476 +bucket 2: 28126 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77603 + vlen: 2003 nvec_nonempty: 1857 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 947508 shallow: 0 total: 947508 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 620824 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1550600 shallow: 0 size: 310412 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77602 entries, memory: 925.3 KB + pending tuples: 0 max pending: 0 zombies: 28 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (wait:A 28 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.95361e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48884 +bucket 2: 28032 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76917 + vlen: 2003 nvec_nonempty: 1832 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 939276 shallow: 0 total: 939276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 615336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1597900 shallow: 0 size: 307668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76916 entries, memory: 917.3 KB + pending tuples: 0 max pending: 0 zombies: 24 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (wait:A 24 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93781e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48728 +bucket 2: 27982 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76711 + vlen: 2003 nvec_nonempty: 1807 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 936804 shallow: 0 total: 936804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc154ab00 shallow: 0 size: 306844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76710 entries, memory: 914.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0094 sec ] + [ GrB_select (hyper to sparse) + 0.00817 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93122e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48664 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76625 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 935772 shallow: 0 total: 935772 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613000 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14b4500 shallow: 0 size: 306500 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76624 entries, memory: 913.8 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00956 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594c00 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00955 sec ] + [ GrB_select (hyper to sparse) + 0.00886 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594b00 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00968 sec ] + [ GrB_select (hyper to sparse) + 0.00812 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594b00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (hyper to sparse) + 0.00887 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00184 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000179 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.98471e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49590 +bucket 2: 27730 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77321 + vlen: 2003 nvec_nonempty: 1940 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 944124 shallow: 0 total: 944124 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 618568 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1100 shallow: 0 size: 309284 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77320 entries, memory: 922.0 KB + pending tuples: 0 max pending: 0 zombies: 328 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (wait:A 328 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.74425e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46632 +bucket 2: 27508 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74141 + vlen: 2003 nvec_nonempty: 1823 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 905964 shallow: 0 total: 905964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 593128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1545e00 shallow: 0 size: 296564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74140 entries, memory: 884.7 KB + pending tuples: 0 max pending: 0 zombies: 158 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00912 sec ] + [ GrB_select (wait:A 158 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.69752e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46054 +bucket 2: 27452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73507 + vlen: 2003 nvec_nonempty: 1660 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 898356 shallow: 0 total: 898356 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 588056 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1586900 shallow: 0 size: 294028 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73506 entries, memory: 877.3 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.68227e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45856 +bucket 2: 27442 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73299 + vlen: 2003 nvec_nonempty: 1647 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 895860 shallow: 0 total: 895860 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 586392 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153d700 shallow: 0 size: 293196 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73298 entries, memory: 874.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00907 sec ] + [ GrB_select (hyper to sparse) + 0.00779 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67525e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45770 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73203 + vlen: 2003 nvec_nonempty: 1641 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 894708 shallow: 0 total: 894708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ada00 shallow: 0 size: 292812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73202 entries, memory: 873.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00931 sec ] + [ GrB_select (hyper to sparse) + 0.00854 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67306e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73173 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 894348 shallow: 0 total: 894348 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585384 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ad900 shallow: 0 size: 292692 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73172 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00923 sec ] + [ GrB_select (hyper to sparse) + 0.0077 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ad900 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00935 sec ] + [ GrB_select (hyper to sparse) + 0.00777 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00181 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000115 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000153 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0179 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92388e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48960 +bucket 2: 27568 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76529 + vlen: 2003 nvec_nonempty: 1936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 934620 shallow: 0 total: 934620 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612232 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc159eb00 shallow: 0 size: 306116 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76528 entries, memory: 912.7 KB + pending tuples: 0 max pending: 0 zombies: 350 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (wait:A 350 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0167 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.66168e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45746 +bucket 2: 27270 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73017 + vlen: 2003 nvec_nonempty: 1771 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 892476 shallow: 0 total: 892476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 584136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1542100 shallow: 0 size: 292068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73016 entries, memory: 871.6 KB + pending tuples: 0 max pending: 0 zombies: 132 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00902 sec ] + [ GrB_select (wait:A 132 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0166 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.61855e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45198 +bucket 2: 27224 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72423 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 885348 shallow: 0 total: 885348 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 579384 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1581400 shallow: 0 size: 289692 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72422 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00925 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0161 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.61016e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45116 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72307 + vlen: 2003 nvec_nonempty: 1614 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 883956 shallow: 0 total: 883956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1539600 shallow: 0 size: 289228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72306 entries, memory: 863.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (hyper to sparse) + 0.00843 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14abe00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00921 sec ] + [ GrB_select (hyper to sparse) + 0.00763 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00181 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000111 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000156 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0182 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.79303e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47624 +bucket 2: 27172 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74797 + vlen: 2003 nvec_nonempty: 1921 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 913836 shallow: 0 total: 913836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 598376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1599a00 shallow: 0 size: 299188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74796 entries, memory: 892.4 KB + pending tuples: 0 max pending: 0 zombies: 142 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00943 sec ] + [ GrB_select (wait:A 142 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.48746e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43754 +bucket 2: 26832 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70587 + vlen: 2003 nvec_nonempty: 1634 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 863316 shallow: 0 total: 863316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 564696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153a500 shallow: 0 size: 282348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70586 entries, memory: 843.1 KB + pending tuples: 0 max pending: 0 zombies: 96 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00894 sec ] + [ GrB_select (wait:A 96 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.42123e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43058 +bucket 2: 26582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69641 + vlen: 2003 nvec_nonempty: 1565 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 851964 shallow: 0 total: 851964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 557128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1574700 shallow: 0 size: 278564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69640 entries, memory: 832.0 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39862e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42842 +bucket 2: 26472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69315 + vlen: 2003 nvec_nonempty: 1553 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 848052 shallow: 0 total: 848052 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 554520 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152e200 shallow: 0 size: 277260 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69314 entries, memory: 828.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0157 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1570800 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.00946 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0155 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152c600 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00817 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a5500 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00931 sec ] + [ GrB_select (hyper to sparse) + 0.00739 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00178 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00011 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000153 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0166 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.65381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46018 +bucket 2: 26890 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72909 + vlen: 2003 nvec_nonempty: 1883 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 891180 shallow: 0 total: 891180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 583272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594200 shallow: 0 size: 291636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72908 entries, memory: 870.3 KB + pending tuples: 0 max pending: 0 zombies: 198 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 18 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 17 + row 167: 22 + row 173: 8 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 13 + ... + Pending (nil) + + 0.00936 sec ] + [ GrB_select (wait:A 198 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.29674e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41694 +bucket 2: 26132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67827 + vlen: 2003 nvec_nonempty: 1598 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 830196 shallow: 0 total: 830196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 542616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1531300 shallow: 0 size: 271308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67826 entries, memory: 810.7 KB + pending tuples: 0 max pending: 0 zombies: 102 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00899 sec ] + [ GrB_select (wait:A 102 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0146 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.22885e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41008 +bucket 2: 25808 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66817 + vlen: 2003 nvec_nonempty: 1496 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 818076 shallow: 0 total: 818076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 534536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1566b00 shallow: 0 size: 267268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66816 entries, memory: 798.9 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00909 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21872e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40988 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66665 + vlen: 2003 nvec_nonempty: 1492 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 816252 shallow: 0 total: 816252 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533320 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1523600 shallow: 0 size: 266660 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66664 entries, memory: 797.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00925 sec ] + [ GrB_select (hyper to sparse) + 0.00797 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21619e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40970 +bucket 2: 25656 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66627 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815796 shallow: 0 total: 815796 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533016 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0d00 shallow: 0 size: 266508 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66626 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00922 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0c00 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00924 sec ] + [ GrB_select (hyper to sparse) + 0.00718 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0c00 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00879 sec ] + [ GrB_select (hyper to sparse) + 0.00718 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0b00 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00879 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0a00 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0900 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00931 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0800 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0600 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00926 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0500 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0400 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0300 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00882 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0300 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00176 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000161 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.28105e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41598 +bucket 2: 25996 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67595 + vlen: 2003 nvec_nonempty: 1872 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 827412 shallow: 0 total: 827412 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 540760 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1584900 shallow: 0 size: 270380 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67594 entries, memory: 808.0 KB + pending tuples: 0 max pending: 0 zombies: 392 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00899 sec ] + [ GrB_select (wait:A 392 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0141 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.83985e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35884 +bucket 2: 24822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60707 + vlen: 2003 nvec_nonempty: 1405 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 744756 shallow: 0 total: 744756 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 485656 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1518900 shallow: 0 size: 242828 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60706 entries, memory: 727.3 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00875 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0144 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77961e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35146 +bucket 2: 24558 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59705 + vlen: 2003 nvec_nonempty: 1187 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 732732 shallow: 0 total: 732732 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 477640 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1544100 shallow: 0 size: 238820 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59704 entries, memory: 715.6 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00859 sec ] + [ GrB_select (hyper to sparse) + 0.00661 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.76106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34938 +bucket 2: 24454 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59393 + vlen: 2003 nvec_nonempty: 1179 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 728988 shallow: 0 total: 728988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 475144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1541e00 shallow: 0 size: 237572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59392 entries, memory: 711.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (hyper to sparse) + 0.00732 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75147e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34870 +bucket 2: 24360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59231 + vlen: 2003 nvec_nonempty: 1171 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 727044 shallow: 0 total: 727044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1540a00 shallow: 0 size: 236924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59230 entries, memory: 710.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (hyper to sparse) + 0.00657 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153fd00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00853 sec ] + [ GrB_select (hyper to sparse) + 0.00736 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00211 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.49e-05 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000138 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0103 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0161 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2382e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41186 +bucket 2: 25770 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66957 + vlen: 2003 nvec_nonempty: 1842 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 819756 shallow: 0 total: 819756 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 535656 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1582b00 shallow: 0 size: 267828 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66956 entries, memory: 800.5 KB + pending tuples: 0 max pending: 0 zombies: 366 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00899 sec ] + [ GrB_select (wait:A 366 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0147 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.79826e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35436 +bucket 2: 24580 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60017 + vlen: 2003 nvec_nonempty: 1381 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 736476 shallow: 0 total: 736476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 480136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1515f00 shallow: 0 size: 240068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60016 entries, memory: 719.2 KB + pending tuples: 0 max pending: 0 zombies: 44 + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (wait:A 44 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0143 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72003e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34440 +bucket 2: 24256 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58697 + vlen: 2003 nvec_nonempty: 1180 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 720636 shallow: 0 total: 720636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 469576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153fe00 shallow: 0 size: 234788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58696 entries, memory: 703.7 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00853 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0137 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6962e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34220 +bucket 2: 24068 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58289 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 715740 shallow: 0 total: 715740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 466312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 233156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58288 entries, memory: 699.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00846 sec ] + [ GrB_select (hyper to sparse) + 0.00721 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34064 +bucket 2: 23954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58019 + vlen: 2003 nvec_nonempty: 1162 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 712500 shallow: 0 total: 712500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 464152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490000 shallow: 0 size: 232076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58018 entries, memory: 695.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00849 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67231e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34000 +bucket 2: 23876 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57877 + vlen: 2003 nvec_nonempty: 1157 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 710796 shallow: 0 total: 710796 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 463016 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148fb00 shallow: 0 size: 231508 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57876 entries, memory: 694.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00846 sec ] + [ GrB_select (hyper to sparse) + 0.00642 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66838e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33952 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57809 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709980 shallow: 0 total: 709980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f900 shallow: 0 size: 231236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57808 entries, memory: 693.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6655e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33932 +bucket 2: 23826 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57759 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709380 shallow: 0 total: 709380 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462072 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 231036 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57758 entries, memory: 692.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00858 sec ] + [ GrB_select (hyper to sparse) + 0.00638 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66388e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33928 +bucket 2: 23802 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57731 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709044 shallow: 0 total: 709044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 230924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57730 entries, memory: 692.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00818 sec ] + [ GrB_select (hyper to sparse) + 0.00638 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66261e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33910 +bucket 2: 23798 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57709 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708780 shallow: 0 total: 708780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57708 entries, memory: 692.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.00641 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66215e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33922 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57701 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708684 shallow: 0 total: 708684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57700 entries, memory: 692.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00839 sec ] + [ GrB_select (hyper to sparse) + 0.00639 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00842 sec ] + [ GrB_select (hyper to sparse) + 0.00638 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00207 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.62e-05 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000142 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0168 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.15672e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40338 +bucket 2: 25388 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 65727 + vlen: 2003 nvec_nonempty: 1822 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 804996 shallow: 0 total: 804996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 525816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc157f000 shallow: 0 size: 262908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 65726 entries, memory: 786.1 KB + pending tuples: 0 max pending: 0 zombies: 362 + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 10 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 10 + row 9: 19 + row 10: 19 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 14 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 10 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00905 sec ] + [ GrB_select (wait:A 362 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0146 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71534e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34398 +bucket 2: 24218 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58617 + vlen: 2003 nvec_nonempty: 1305 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 719676 shallow: 0 total: 719676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 468936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1510d00 shallow: 0 size: 234468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58616 entries, memory: 702.8 KB + pending tuples: 0 max pending: 0 zombies: 20 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00849 sec ] + [ GrB_select (wait:A 20 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6248e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33306 +bucket 2: 23742 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57049 + vlen: 2003 nvec_nonempty: 1155 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 700860 shallow: 0 total: 700860 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 456392 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1538500 shallow: 0 size: 228196 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57048 entries, memory: 684.4 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00836 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0137 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.5773e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32858 +bucket 2: 23350 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56209 + vlen: 2003 nvec_nonempty: 1134 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 690780 shallow: 0 total: 690780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 449672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fbe00 shallow: 0 size: 224836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56208 entries, memory: 674.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00814 sec ] + [ GrB_select (hyper to sparse) + 0.00677 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54837e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32788 +bucket 2: 22902 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55691 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 684564 shallow: 0 total: 684564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 445528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b700 shallow: 0 size: 222764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55690 entries, memory: 668.5 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.00624 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53904e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32660 +bucket 2: 22862 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55523 + vlen: 2003 nvec_nonempty: 1107 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 682548 shallow: 0 total: 682548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 444184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b200 shallow: 0 size: 222092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55522 entries, memory: 666.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00807 sec ] + [ GrB_select (hyper to sparse) + 0.00695 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00813 sec ] + [ GrB_select (hyper to sparse) + 0.00617 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00197 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.48e-05 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000149 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.05966e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 39348 +bucket 2: 24882 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 64231 + vlen: 2003 nvec_nonempty: 1805 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 787044 shallow: 0 total: 787044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 513848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc157ab00 shallow: 0 size: 256924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 64230 entries, memory: 768.6 KB + pending tuples: 0 max pending: 0 zombies: 414 + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 6 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 9 + row 9: 19 + row 10: 19 + row 11: 18 + row 156: 17 + row 157: 17 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 22 entries [21:42] + row 0: 20 + row 2: 6 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 9 + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (wait:A 414 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.58371e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32850 +bucket 2: 23472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56323 + vlen: 2003 nvec_nonempty: 1164 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 692148 shallow: 0 total: 692148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 450584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1509600 shallow: 0 size: 225292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56322 entries, memory: 675.9 KB + pending tuples: 0 max pending: 0 zombies: 8 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00829 sec ] + [ GrB_select (wait:A 8 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0132 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.48377e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32068 +bucket 2: 22448 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54517 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 670476 shallow: 0 total: 670476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 436136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152c600 shallow: 0 size: 218068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54516 entries, memory: 654.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.00685 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.43283e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31686 +bucket 2: 21886 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53573 + vlen: 2003 nvec_nonempty: 1090 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 659148 shallow: 0 total: 659148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 428584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1528000 shallow: 0 size: 214292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53572 entries, memory: 643.7 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00784 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41661e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31630 +bucket 2: 21638 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53269 + vlen: 2003 nvec_nonempty: 1078 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 655500 shallow: 0 total: 655500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 426152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1524100 shallow: 0 size: 213076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53268 entries, memory: 640.1 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00781 sec ] + [ GrB_select (hyper to sparse) + 0.00607 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1522c00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00782 sec ] + [ GrB_select (hyper to sparse) + 0.00604 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 4.47e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00191 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.56e-05 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000131 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.92656e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 37642 +bucket 2: 24478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 62121 + vlen: 2003 nvec_nonempty: 1774 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 761724 shallow: 0 total: 761724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 496968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1574800 shallow: 0 size: 248484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 62120 entries, memory: 743.9 KB + pending tuples: 0 max pending: 0 zombies: 426 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 13 + row 163: 12 + row 165: 8 + row 166: 10 + row 167: 18 + column: 1 : 21 entries [19:39] + row 0: 18 + row 2: 1 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + ... + Pending (nil) + + 0.00872 sec ] + [ GrB_select (wait:A 426 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0137 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45722e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31526 +bucket 2: 22500 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54027 + vlen: 2003 nvec_nonempty: 1135 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 664596 shallow: 0 total: 664596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 432216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1500b00 shallow: 0 size: 216108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54026 entries, memory: 649.0 KB + pending tuples: 0 max pending: 0 zombies: 20 + + column: 0 : 14 entries [0:13] + row 1: 13 + row 3: 11 + row 4: 13 + row 5: 13 + row 6: 13 + row 7: 13 + row 9: 13 + row 10: 13 + row 156: 13 + row 157: 13 + row 159: 12 + row 160: 12 + row 161: 13 + row 167: 13 + column: 1 : 14 entries [14:27] + row 0: 13 + row 3: 11 + row 4: 13 + row 5: 13 + row 6: 13 + row 7: 13 + row 9: 13 + row 10: 13 + row 156: 13 + row 157: 13 + row 159: 12 + row 160: 12 + row 161: 13 + row 167: 13 + column: 2 : 0 entries [28:27] + column: 3 : 12 entries [28:39] + row 0: 11 + ... + Pending (nil) + + 0.00811 sec ] + [ GrB_select (wait:A 20 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0127 sec ] + [ GrB_Matrix_nvals + 1.56e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.33568e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30944 +bucket 2: 20780 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51725 + vlen: 2003 nvec_nonempty: 1075 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 636972 shallow: 0 total: 636972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 413800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc151fc00 shallow: 0 size: 206900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51724 entries, memory: 622.0 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00767 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0117 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.27432e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30334 +bucket 2: 20188 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50523 + vlen: 2003 nvec_nonempty: 1026 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 622548 shallow: 0 total: 622548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 404184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14e6600 shallow: 0 size: 202092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50522 entries, memory: 608.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0076 sec ] + [ GrB_select (hyper to sparse) + 0.00647 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25143e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30100 +bucket 2: 19966 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50067 + vlen: 2003 nvec_nonempty: 1012 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 617076 shallow: 0 total: 617076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 400536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480700 shallow: 0 size: 200268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50066 entries, memory: 602.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00763 sec ] + [ GrB_select (hyper to sparse) + 0.00644 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24254e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29946 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49889 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 614940 shallow: 0 total: 614940 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 399112 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480200 shallow: 0 size: 199556 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49888 entries, memory: 600.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00754 sec ] + [ GrB_select (hyper to sparse) + 0.00573 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0075 sec ] + [ GrB_select (hyper to sparse) + 0.00647 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00754 sec ] + [ GrB_select (hyper to sparse) + 0.00571 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00188 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.48e-05 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000136 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.82328e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36250 +bucket 2: 24182 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60433 + vlen: 2003 nvec_nonempty: 1754 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 741468 shallow: 0 total: 741468 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 483464 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc156f900 shallow: 0 size: 241732 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60432 entries, memory: 724.1 KB + pending tuples: 0 max pending: 0 zombies: 428 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 13 + row 163: 12 + row 165: 8 + row 166: 10 + row 167: 18 + column: 1 : 21 entries [19:39] + row 0: 18 + row 2: 1 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + ... + Pending (nil) + + 0.00862 sec ] + [ GrB_select (wait:A 428 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.33754e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 21174 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51761 + vlen: 2003 nvec_nonempty: 1108 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 637404 shallow: 0 total: 637404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 414088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14c6700 shallow: 0 size: 207044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51760 entries, memory: 622.5 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 14 entries [0:13] + row 1: 13 + row 3: 10 + row 4: 11 + row 5: 6 + row 6: 13 + row 7: 13 + row 9: 12 + row 10: 12 + row 156: 12 + row 157: 12 + row 159: 10 + row 160: 10 + row 161: 13 + row 167: 13 + column: 1 : 14 entries [14:27] + row 0: 13 + row 3: 10 + row 4: 11 + row 5: 6 + row 6: 13 + row 7: 13 + row 9: 12 + row 10: 12 + row 156: 12 + row 157: 12 + row 159: 10 + row 160: 10 + row 161: 13 + row 167: 13 + column: 2 : 0 entries [28:27] + column: 3 : 11 entries [28:38] + row 0: 10 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0123 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.19303e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29410 +bucket 2: 19474 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48885 + vlen: 2003 nvec_nonempty: 999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 602892 shallow: 0 total: 602892 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 391080 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147e200 shallow: 0 size: 195540 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48884 entries, memory: 588.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00749 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0116 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13995e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28554 +bucket 2: 19230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47785 + vlen: 2003 nvec_nonempty: 954 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 589692 shallow: 0 total: 589692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 382280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150a300 shallow: 0 size: 191140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47784 entries, memory: 575.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00743 sec ] + [ GrB_select (hyper to sparse) + 0.00552 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13004e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28380 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47577 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 587196 shallow: 0 total: 587196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 380616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1507e00 shallow: 0 size: 190308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47576 entries, memory: 573.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00743 sec ] + [ GrB_select (hyper to sparse) + 0.00626 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1506e00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00744 sec ] + [ GrB_select (hyper to sparse) + 0.00627 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00182 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.5e-05 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000251 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55906e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32398 +bucket 2: 23484 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55883 + vlen: 2003 nvec_nonempty: 1562 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 686868 shallow: 0 total: 686868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 447064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1562400 shallow: 0 size: 223532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55882 entries, memory: 670.8 KB + pending tuples: 0 max pending: 0 zombies: 290 + + column: 0 : 18 entries [0:17] + row 1: 16 + row 3: 13 + row 4: 14 + row 5: 12 + row 6: 16 + row 7: 16 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + row 157: 15 + row 159: 12 + row 160: 12 + row 161: 17 + row 162: 10 + row 163: 9 + row 166: 6 + row 167: 17 + column: 1 : 20 entries [18:37] + row 0: 16 + row 2: 1 + row 3: 13 + row 4: 13 + row 5: 11 + row 6: 16 + row 7: 16 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + ... + Pending (nil) + + 0.00834 sec ] + [ GrB_select (wait:A 290 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0127 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.17494e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28792 +bucket 2: 19720 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48513 + vlen: 2003 nvec_nonempty: 1059 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 598428 shallow: 0 total: 598428 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 388104 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14baa00 shallow: 0 size: 194052 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48512 entries, memory: 584.4 KB + pending tuples: 0 max pending: 0 zombies: 12 + + column: 0 : 2 entries [0:1] + row 161: 1 + row 167: 1 + column: 1 : 2 entries [2:3] + row 161: 1 + row 167: 1 + column: 2 : 0 entries [4:3] + column: 3 : 0 entries [4:3] + column: 4 : 0 entries [4:3] + column: 5 : 0 entries [4:3] + column: 6 : 18 entries [4:21] + row 7: 17 + row 9: 16 + row 10: 16 + row 11: 16 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 13 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [22:39] + row 6: 17 + row 9: 16 + row 10: 16 + row 11: 16 + row 12: 17 + row 13: 17 + row 15: 17 + ... + Pending (nil) + + 0.00763 sec ] + [ GrB_select (wait:A 12 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0114 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02523e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26864 +bucket 2: 18452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45317 + vlen: 2003 nvec_nonempty: 880 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 560076 shallow: 0 total: 560076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 362536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1477300 shallow: 0 size: 181268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45316 entries, memory: 546.9 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 13 entries [0:12] + row 7: 12 + row 12: 12 + row 13: 12 + row 15: 12 + row 16: 12 + row 17: 12 + row 150: 12 + row 151: 12 + row 155: 12 + row 156: 12 + row 159: 12 + row 160: 12 + row 161: 12 + column: 7 : 13 entries [13:25] + row 6: 12 + row 12: 12 + row 13: 12 + row 15: 12 + row 16: 12 + row 17: 12 + row 150: 12 + row 151: 12 + row 155: 12 + row 156: 12 + row 159: 12 + row 160: 12 + row 161: 12 + column: 8 : 0 entries [26:25] + column: 9 : 0 entries [26:25] + column: 10 : 0 entries [26:25] + ... + Pending (nil) + + 0.00718 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0114 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00291e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26452 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44821 + vlen: 2003 nvec_nonempty: 835 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 554124 shallow: 0 total: 554124 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358568 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fa900 shallow: 0 size: 179284 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44820 entries, memory: 541.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00706 sec ] + [ GrB_select (hyper to sparse) + 0.0053 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14f9800 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00709 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00167 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.28e-05 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000139 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.49359e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 23110 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54697 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 672636 shallow: 0 total: 672636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 437576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155ec00 shallow: 0 size: 218788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54696 entries, memory: 656.9 KB + pending tuples: 0 max pending: 0 zombies: 274 + + column: 0 : 18 entries [0:17] + row 1: 15 + row 3: 12 + row 4: 14 + row 5: 12 + row 6: 15 + row 7: 15 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + row 157: 15 + row 159: 12 + row 160: 12 + row 161: 17 + row 162: 7 + row 163: 8 + row 166: 6 + row 167: 17 + column: 1 : 17 entries [18:34] + row 0: 15 + row 2: 1 + row 3: 12 + row 4: 12 + row 5: 10 + row 6: 15 + row 7: 15 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + ... + Pending (nil) + + 0.00824 sec ] + [ GrB_select (wait:A 274 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0125 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.0822e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28032 +bucket 2: 18526 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46559 + vlen: 2003 nvec_nonempty: 1023 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 574980 shallow: 0 total: 574980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 372472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14e3e00 shallow: 0 size: 186236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46558 entries, memory: 561.5 KB + pending tuples: 0 max pending: 0 zombies: 28 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 15 entries [0:14] + row 7: 14 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 9 + row 13: 9 + row 15: 9 + row 16: 9 + row 150: 11 + row 155: 11 + row 156: 10 + row 157: 10 + row 159: 14 + row 160: 14 + row 161: 14 + column: 7 : 15 entries [15:29] + row 6: 14 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 9 + row 13: 9 + row 15: 9 + row 16: 9 + row 150: 11 + row 155: 11 + row 156: 10 + row 157: 10 + row 159: 14 + row 160: 14 + ... + Pending (nil) + + 0.00741 sec ] + [ GrB_select (wait:A 28 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0111 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:990598 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26420 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44545 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550812 shallow: 0 total: 550812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 356360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fc100 shallow: 0 size: 178180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44544 entries, memory: 537.9 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00706 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0112 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cc800 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00706 sec ] + [ GrB_select (hyper to sparse) + 0.006 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00169 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.26e-05 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000129 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0151 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42226e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 22788 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53375 + vlen: 2003 nvec_nonempty: 1509 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 656772 shallow: 0 total: 656772 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 427000 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486e00 shallow: 0 size: 213500 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53374 entries, memory: 641.4 KB + pending tuples: 0 max pending: 0 zombies: 244 + + column: 0 : 13 entries [0:12] + row 1: 8 + row 3: 6 + row 4: 10 + row 5: 4 + row 6: 8 + row 7: 8 + row 9: 8 + row 10: 8 + row 156: 8 + row 157: 8 + row 162: 5 + row 166: 3 + row 167: 6 + column: 1 : 10 entries [13:22] + row 0: 8 + row 3: 6 + row 4: 6 + row 6: 8 + row 7: 8 + row 9: 8 + row 10: 8 + row 156: 7 + row 157: 7 + row 163: 2 + column: 2 : 0 entries [23:22] + column: 3 : 7 entries [23:29] + row 0: 6 + row 1: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (wait:A 244 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0119 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02089e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26828 +bucket 2: 18392 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45221 + vlen: 2003 nvec_nonempty: 954 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 558924 shallow: 0 total: 558924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 361768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150b000 shallow: 0 size: 180884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45220 entries, memory: 545.8 KB + pending tuples: 0 max pending: 0 zombies: 26 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 3 entries [0:2] + row 7: 2 + row 9: 2 + row 10: 2 + column: 7 : 3 entries [3:5] + row 6: 2 + row 9: 2 + row 10: 2 + column: 8 : 0 entries [6:5] + column: 9 : 3 entries [6:8] + row 6: 2 + row 7: 2 + row 10: 2 + column: 10 : 3 entries [9:11] + row 6: 2 + row 7: 2 + row 9: 2 + ... + Pending (nil) + + 0.00712 sec ] + [ GrB_select (wait:A 26 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.011 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cdd00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00708 sec ] + [ GrB_select (hyper to sparse) + 0.00601 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00174 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.39e-05 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.377e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30114 +bucket 2: 22404 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52519 + vlen: 2003 nvec_nonempty: 1497 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 646500 shallow: 0 total: 646500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 420152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1485400 shallow: 0 size: 210076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52518 entries, memory: 631.3 KB + pending tuples: 0 max pending: 0 zombies: 234 + + column: 0 : 11 entries [0:10] + row 1: 6 + row 3: 6 + row 4: 10 + row 5: 4 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + row 162: 3 + row 166: 3 + row 167: 4 + column: 1 : 8 entries [11:18] + row 0: 6 + row 3: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + row 163: zombie + column: 2 : 0 entries [19:18] + column: 3 : 7 entries [19:25] + row 0: 6 + row 1: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + column: 4 : 11 entries [26:36] + row 0: 10 + row 1: 6 + row 3: 6 + ... + Pending (nil) + + 0.00781 sec ] + [ GrB_select (wait:A 234 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0121 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00479e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26616 +bucket 2: 18246 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44863 + vlen: 2003 nvec_nonempty: 917 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 554628 shallow: 0 total: 554628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1508300 shallow: 0 size: 179452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44862 entries, memory: 541.6 KB + pending tuples: 0 max pending: 0 zombies: 24 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 1 entries [0:0] + row 7: zombie + column: 7 : 1 entries [1:1] + row 6: zombie + column: 8 : 0 entries [2:1] + column: 9 : 0 entries [2:1] + column: 10 : 0 entries [2:1] + ... + Pending (nil) + + 0.00708 sec ] + [ GrB_select (wait:A 24 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0113 sec ] + [ GrB_Matrix_nvals + 1.56e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cd100 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00706 sec ] + [ GrB_select (hyper to sparse) + 0.00602 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00172 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.25e-05 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000126 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.31715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29374 +bucket 2: 21990 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51365 + vlen: 2003 nvec_nonempty: 1453 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 632652 shallow: 0 total: 632652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 410920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1483000 shallow: 0 size: 205460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51364 entries, memory: 617.8 KB + pending tuples: 0 max pending: 0 zombies: 200 + + column: 0 : 8 entries [0:7] + row 1: 2 + row 4: 3 + row 5: 2 + row 6: 2 + row 7: 2 + row 162: 2 + row 166: 1 + row 167: 4 + column: 1 : 5 entries [8:12] + row 0: 2 + row 3: zombie + row 6: 2 + row 7: 2 + row 163: zombie + column: 2 : 0 entries [13:12] + column: 3 : 1 entries [13:13] + row 1: zombie + column: 4 : 4 entries [14:17] + row 0: 3 + row 5: 2 + row 162: 2 + row 167: 3 + column: 5 : 3 entries [18:20] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 16 entries [21:36] + row 0: 2 + row 1: 2 + row 7: 15 + row 8: 3 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 4 + ... + Pending (nil) + + 0.00771 sec ] + [ GrB_select (wait:A 200 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.012 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:977035 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26282 +bucket 2: 17956 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44239 + vlen: 2003 nvec_nonempty: 869 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 547140 shallow: 0 total: 547140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 353912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1504400 shallow: 0 size: 176956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44238 entries, memory: 534.3 KB + pending tuples: 0 max pending: 0 zombies: 8 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00701 sec ] + [ GrB_select (wait:A 8 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0103 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:938723 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25688 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43363 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 536628 shallow: 0 total: 536628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 346904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14c9d00 shallow: 0 size: 173452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43362 entries, memory: 524.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.00586 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472b00 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00691 sec ] + [ GrB_select (hyper to sparse) + 0.00577 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472200 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00682 sec ] + [ GrB_select (hyper to sparse) + 0.0056 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472200 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00683 sec ] + [ GrB_select (hyper to sparse) + 0.0051 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472100 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00507 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00163 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.11e-05 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00012 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23111e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29440 +bucket 2: 20218 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49659 + vlen: 2003 nvec_nonempty: 1421 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612180 shallow: 0 total: 612180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fa00 shallow: 0 size: 198636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49658 entries, memory: 597.8 KB + pending tuples: 0 max pending: 0 zombies: 180 + + column: 0 : 8 entries [0:7] + row 1: 2 + row 4: 3 + row 5: 2 + row 6: 2 + row 7: 2 + row 162: 2 + row 166: 1 + row 167: 4 + column: 1 : 4 entries [8:11] + row 0: 2 + row 6: 2 + row 7: 2 + row 163: zombie + column: 2 : 0 entries [12:11] + column: 3 : 0 entries [12:11] + column: 4 : 4 entries [12:15] + row 0: 3 + row 5: 2 + row 162: 2 + row 167: 3 + column: 5 : 3 entries [16:18] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 15 entries [19:33] + row 0: 2 + row 1: 2 + row 7: 14 + row 8: 1 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 3 + row 13: 3 + row 150: 8 + ... + Pending (nil) + + 0.00764 sec ] + [ GrB_select (wait:A 180 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0114 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:751128 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24834 +bucket 2: 13954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 38789 + vlen: 2003 nvec_nonempty: 831 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 481740 shallow: 0 total: 481740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 310312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14f1100 shallow: 0 size: 155156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 38788 entries, memory: 470.4 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00656 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0104 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:636932 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23340 +bucket 2: 12378 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35719 + vlen: 2003 nvec_nonempty: 730 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 444900 shallow: 0 total: 444900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 285752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14b0300 shallow: 0 size: 142876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35718 entries, memory: 434.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00638 sec ] + [ GrB_select (hyper to sparse) + 0.0053 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:620984 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23360 +bucket 2: 11908 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35269 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 439500 shallow: 0 total: 439500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 282152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463900 shallow: 0 size: 141076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35268 entries, memory: 429.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00636 sec ] + [ GrB_select (hyper to sparse) + 0.00444 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612911 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23636 +bucket 2: 11402 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35039 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436740 shallow: 0 total: 436740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 280312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463100 shallow: 0 size: 140156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35038 entries, memory: 426.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00629 sec ] + [ GrB_select (hyper to sparse) + 0.00518 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611653 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23678 +bucket 2: 11324 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35003 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436308 shallow: 0 total: 436308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 280024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463000 shallow: 0 size: 140012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35002 entries, memory: 426.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00647 sec ] + [ GrB_select (hyper to sparse) + 0.00438 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463000 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00627 sec ] + [ GrB_select (hyper to sparse) + 0.00438 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1462f00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00631 sec ] + [ GrB_select (hyper to sparse) + 0.0044 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00148 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.87e-05 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000128 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:863070 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29724 +bucket 2: 11854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 41579 + vlen: 2003 nvec_nonempty: 1368 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 515220 shallow: 0 total: 515220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 332632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146fe00 shallow: 0 size: 166316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 41578 entries, memory: 503.1 KB + pending tuples: 0 max pending: 0 zombies: 128 + + column: 0 : 4 entries [0:3] + row 4: 2 + row 5: 2 + row 162: 1 + row 167: 3 + column: 1 : 0 entries [4:3] + column: 2 : 0 entries [4:3] + column: 3 : 0 entries [4:3] + column: 4 : 3 entries [4:6] + row 0: 2 + row 5: 2 + row 167: 2 + column: 5 : 3 entries [7:9] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 9 entries [10:18] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [19:27] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [28:27] + column: 9 : 9 entries [28:36] + row 6: 8 + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (wait:A 128 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00969 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:367034 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26862 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 27115 + vlen: 2003 nvec_nonempty: 713 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 341652 shallow: 0 total: 341652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 216920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1453a00 shallow: 0 size: 108460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 27114 entries, memory: 333.6 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00546 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484b00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00423 sec ] + [ GrB_select (hyper to sparse) + 0.0043 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00128 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000197 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.57e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0142 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825705 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29092 +bucket 2: 11576 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40669 + vlen: 2003 nvec_nonempty: 1347 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 504300 shallow: 0 total: 504300 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 325352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146e100 shallow: 0 size: 162676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40668 entries, memory: 492.5 KB + pending tuples: 0 max pending: 0 zombies: 98 + + column: 0 : 3 entries [0:2] + row 4: 2 + row 5: 2 + row 167: 2 + column: 1 : 0 entries [3:2] + column: 2 : 0 entries [3:2] + column: 3 : 0 entries [3:2] + column: 4 : 3 entries [3:5] + row 0: 2 + row 5: 2 + row 167: 2 + column: 5 : 3 entries [6:8] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 9 entries [9:17] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [18:26] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [27:26] + column: 9 : 9 entries [27:35] + row 6: 8 + row 7: 8 + ... + Pending (nil) + + 0.00685 sec ] + [ GrB_select (wait:A 98 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00934 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:366168 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26830 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 27083 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 341268 shallow: 0 total: 341268 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 216664 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14bd500 shallow: 0 size: 108332 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 27082 entries, memory: 333.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00549 sec ] + [ GrB_select (hyper to sparse) + 0.00356 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc144fb00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00422 sec ] + [ GrB_select (hyper to sparse) + 0.00429 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00123 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.61e-05 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.56e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0157 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:625854 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28470 +bucket 2: 6936 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35407 + vlen: 2003 nvec_nonempty: 1290 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 441156 shallow: 0 total: 441156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 283256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463d00 shallow: 0 size: 141628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35406 entries, memory: 430.8 KB + pending tuples: 0 max pending: 0 zombies: 86 + + column: 0 : 1 entries [0:0] + row 4: zombie + column: 1 : 0 entries [1:0] + column: 2 : 0 entries [1:0] + column: 3 : 0 entries [1:0] + column: 4 : 1 entries [1:1] + row 0: zombie + column: 5 : 0 entries [2:1] + column: 6 : 9 entries [2:10] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [11:19] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [20:19] + column: 9 : 9 entries [20:28] + row 6: 8 + row 7: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 10 : 9 entries [29:37] + ... + Pending (nil) + + 0.00648 sec ] + [ GrB_select (wait:A 86 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00822 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:152477 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17224 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17477 + vlen: 2003 nvec_nonempty: 601 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 225996 shallow: 0 total: 225996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a9e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 139816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 69908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17476 entries, memory: 220.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0046 sec ] + [ GrB_select (hyper to sparse) + 0.00262 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106712 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14620 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14621 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 191724 shallow: 0 total: 191724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 116968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 58484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14620 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00323 sec ] + [ GrB_select (hyper to sparse) + 0.00332 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00389 sec ] + [ GrB_select (hyper to sparse) + 0.00254 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000952 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.96e-05 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.69e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:585584 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28216 +bucket 2: 6032 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34249 + vlen: 2003 nvec_nonempty: 1133 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 427260 shallow: 0 total: 427260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 273992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1461900 shallow: 0 size: 136996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34248 entries, memory: 417.2 KB + pending tuples: 0 max pending: 0 zombies: 68 + + column: 0 : 1 entries [0:0] + row 4: zombie + column: 1 : 0 entries [1:0] + column: 2 : 0 entries [1:0] + column: 3 : 0 entries [1:0] + column: 4 : 1 entries [1:1] + row 0: zombie + column: 5 : 0 entries [2:1] + column: 6 : 9 entries [2:10] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [11:19] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [20:19] + column: 9 : 9 entries [20:28] + row 6: 8 + row 7: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 10 : 9 entries [29:37] + ... + Pending (nil) + + 0.00637 sec ] + [ GrB_select (wait:A 68 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00789 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:137143 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 16418 +bucket 2: 156 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 16575 + vlen: 2003 nvec_nonempty: 597 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 215172 shallow: 0 total: 215172 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 132600 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 66300 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 16574 entries, memory: 210.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00407 sec ] + [ GrB_select (hyper to sparse) + 0.00241 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:78332.8 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12526 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12527 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 166596 shallow: 0 total: 166596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 100216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 50108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12526 entries, memory: 162.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00297 sec ] + [ GrB_select (hyper to sparse) + 0.00279 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc141ea00 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00301 sec ] + [ GrB_select (hyper to sparse) + 0.00236 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000907 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.26e-05 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.14e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0133 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418713 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28960 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28961 + vlen: 2003 nvec_nonempty: 1107 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 363804 shallow: 0 total: 363804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 231688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1457400 shallow: 0 size: 115844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28960 entries, memory: 355.3 KB + pending tuples: 0 max pending: 0 zombies: 64 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 9 entries [0:8] + row 7: 8 + row 9: 6 + row 10: 6 + row 11: 4 + row 156: 7 + row 157: 7 + row 159: 5 + row 160: 5 + row 161: 8 + column: 7 : 9 entries [9:17] + row 6: 8 + row 9: 6 + row 10: 6 + row 11: 4 + row 156: 7 + row 157: 7 + row 159: 5 + row 160: 5 + row 161: 8 + column: 8 : 0 entries [18:17] + column: 9 : 7 entries [18:24] + row 6: 6 + row 7: 6 + row 10: 6 + row 11: 4 + row 156: 5 + row 157: 5 + row 161: 6 + column: 10 : 7 entries [25:31] + row 6: 6 + row 7: 6 + row 9: 6 + row 11: 4 + ... + Pending (nil) + + 0.00457 sec ] + [ GrB_select (wait:A 64 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0068 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:14601.3 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 5408 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 5409 + vlen: 2003 nvec_nonempty: 543 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 81180 shallow: 0 total: 81180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df7200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 43272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc06a3600 shallow: 0 size: 21636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 5408 entries, memory: 79.3 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00164 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) C is empty, iso 0 + + 0.00289 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 + 0.000158 sec ] + [ GrB_select C is empty, iso 0 + + 0.000376 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=H.*H) (jit: cpu load) + 0.000559 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 5.92e-06 sec ] + [ GrB_reduce work:0 gpus:0 + 4.57e-06 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0187 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df4700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00862 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000108 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00944 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7900 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0698c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00859 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00977 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc069cb00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00971 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00969 sec ] + [ GrB_select (hyper to sparse) + 0.00852 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc069cb00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00978 sec ] + [ GrB_select (hyper to sparse) + 0.00849 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.39e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00857 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27866e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52638 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81039 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 988740 shallow: 0 total: 988740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81038 entries, memory: 965.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52600 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81001 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 988284 shallow: 0 total: 988284 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648008 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 324004 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81000 entries, memory: 965.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00978 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.273e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52568 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80969 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 987900 shallow: 0 total: 987900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80968 entries, memory: 964.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00972 sec ] + [ GrB_select (hyper to sparse) + 0.00845 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00976 sec ] + [ GrB_select (hyper to sparse) + 0.00842 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (hyper to sparse) + 0.00848 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00847 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.59e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00848 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.20416e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51834 +bucket 2: 28278 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80113 + vlen: 2003 nvec_nonempty: 1935 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 977628 shallow: 0 total: 977628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 640904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 320452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80112 entries, memory: 954.7 KB + pending tuples: 0 max pending: 0 zombies: 36 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (wait:A 36 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.17559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51490 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79755 + vlen: 2003 nvec_nonempty: 1926 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 973332 shallow: 0 total: 973332 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1556e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 638040 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1968700 shallow: 0 size: 319020 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79754 entries, memory: 950.5 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00956 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.15333e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51226 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79475 + vlen: 2003 nvec_nonempty: 1911 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 969972 shallow: 0 total: 969972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1559a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 635800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79474 entries, memory: 947.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00891 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14223e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51092 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79335 + vlen: 2003 nvec_nonempty: 1901 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 968292 shallow: 0 total: 968292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79334 entries, memory: 945.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00836 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00912 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.0084 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00912 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00911 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00906 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.04165e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49922 +bucket 2: 28132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 78055 + vlen: 2003 nvec_nonempty: 1878 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 952932 shallow: 0 total: 952932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 624440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 312220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 78054 entries, memory: 930.6 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00977 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.97438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49104 +bucket 2: 28082 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77187 + vlen: 2003 nvec_nonempty: 1839 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 942516 shallow: 0 total: 942516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1551c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 617496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1a50300 shallow: 0 size: 308748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77186 entries, memory: 920.4 KB + pending tuples: 0 max pending: 0 zombies: 16 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00943 sec ] + [ GrB_select (wait:A 16 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.94317e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48762 +bucket 2: 28018 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76781 + vlen: 2003 nvec_nonempty: 1813 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 937644 shallow: 0 total: 937644 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc154f500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 614248 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 307124 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76780 entries, memory: 915.7 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00937 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93367e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48680 +bucket 2: 27976 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76657 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 936156 shallow: 0 total: 936156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c6200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1a50300 shallow: 0 size: 306628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76656 entries, memory: 914.2 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0096 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.42e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93092e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48660 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76621 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935724 shallow: 0 total: 935724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1551900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76620 entries, memory: 913.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00956 sec ] + [ GrB_select (hyper to sparse) + 0.00891 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00933 sec ] + [ GrB_select (hyper to sparse) + 0.00816 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (hyper to sparse) + 0.00892 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00816 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0094 sec ] + [ GrB_select (hyper to sparse) + 0.00878 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.78437e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47082 +bucket 2: 27598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74681 + vlen: 2003 nvec_nonempty: 1767 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 912444 shallow: 0 total: 912444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 597448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 298724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74680 entries, memory: 891.1 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00917 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.70046e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46074 +bucket 2: 27472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73547 + vlen: 2003 nvec_nonempty: 1701 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 898836 shallow: 0 total: 898836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 588376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1b30a00 shallow: 0 size: 294188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73546 entries, memory: 877.8 KB + pending tuples: 0 max pending: 0 zombies: 46 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00917 sec ] + [ GrB_select (wait:A 46 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0161 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67745e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45800 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73233 + vlen: 2003 nvec_nonempty: 1643 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 895068 shallow: 0 total: 895068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73232 entries, memory: 874.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00907 sec ] + [ GrB_select (hyper to sparse) + 0.00858 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67423e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45756 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73189 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894540 shallow: 0 total: 894540 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585512 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292756 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73188 entries, memory: 873.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00904 sec ] + [ GrB_select (hyper to sparse) + 0.00786 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6735e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45748 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73179 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894420 shallow: 0 total: 894420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73178 entries, memory: 873.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00903 sec ] + [ GrB_select (hyper to sparse) + 0.00858 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00905 sec ] + [ GrB_select (hyper to sparse) + 0.00784 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00908 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.64363e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45482 +bucket 2: 27286 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72769 + vlen: 2003 nvec_nonempty: 1633 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 889500 shallow: 0 total: 889500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 582152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 291076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72768 entries, memory: 868.7 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00899 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45228 +bucket 2: 27192 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72421 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 885324 shallow: 0 total: 885324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 579368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1c07100 shallow: 0 size: 289684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72420 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.009 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6093e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45104 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72295 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 883812 shallow: 0 total: 883812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72294 entries, memory: 863.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00901 sec ] + [ GrB_select (hyper to sparse) + 0.00782 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00895 sec ] + [ GrB_select (hyper to sparse) + 0.00853 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000114 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00901 sec ] + [ GrB_select (hyper to sparse) + 0.0077 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.51488e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 44110 +bucket 2: 26864 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70975 + vlen: 2003 nvec_nonempty: 1605 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 867972 shallow: 0 total: 867972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 567800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 283900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70974 entries, memory: 847.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00891 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0167 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.45627e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43418 +bucket 2: 26724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70143 + vlen: 2003 nvec_nonempty: 1580 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 857988 shallow: 0 total: 857988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 561144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 280572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70142 entries, memory: 837.9 KB + pending tuples: 0 max pending: 0 zombies: 22 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0089 sec ] + [ GrB_select (wait:A 22 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.41651e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43012 +bucket 2: 26560 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69573 + vlen: 2003 nvec_nonempty: 1564 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 851148 shallow: 0 total: 851148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 556584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 278292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69572 entries, memory: 831.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00882 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39834e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42848 +bucket 2: 26462 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69311 + vlen: 2003 nvec_nonempty: 1552 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 848004 shallow: 0 total: 848004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 554488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 277244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69310 entries, memory: 828.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00886 sec ] + [ GrB_select (hyper to sparse) + 0.00751 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0157 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (hyper to sparse) + 0.0075 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00877 sec ] + [ GrB_select (hyper to sparse) + 0.00828 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00013 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00886 sec ] + [ GrB_select (hyper to sparse) + 0.00746 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.30569e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41946 +bucket 2: 26012 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67959 + vlen: 2003 nvec_nonempty: 1533 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 831780 shallow: 0 total: 831780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 543672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 271836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67958 entries, memory: 812.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00869 sec ] + [ GrB_select (hyper to sparse) + 0.00816 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.23646e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41162 +bucket 2: 25768 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66931 + vlen: 2003 nvec_nonempty: 1506 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 819444 shallow: 0 total: 819444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 535448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 267724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66930 entries, memory: 800.2 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00861 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0154 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21965e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41002 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66679 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 816420 shallow: 0 total: 816420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66678 entries, memory: 797.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00787 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21659e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40982 +bucket 2: 25650 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66633 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815868 shallow: 0 total: 815868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66632 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21552e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40968 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66617 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815676 shallow: 0 total: 815676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66616 entries, memory: 796.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00876 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00864 sec ] + [ GrB_select (hyper to sparse) + 0.00732 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (hyper to sparse) + 0.00805 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (hyper to sparse) + 0.00735 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0300 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00886 sec ] + [ GrB_select (hyper to sparse) + 0.00802 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0300 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00862 sec ] + [ GrB_select (hyper to sparse) + 0.00729 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00888 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00884 sec ] + [ GrB_select (hyper to sparse) + 0.00726 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00862 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00884 sec ] + [ GrB_select (hyper to sparse) + 0.00726 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00861 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.4e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.88498e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36722 +bucket 2: 24724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 61447 + vlen: 2003 nvec_nonempty: 1387 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 753636 shallow: 0 total: 753636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 491576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 245788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 61446 entries, memory: 736.0 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 16 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00828 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77056e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35084 +bucket 2: 24468 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59553 + vlen: 2003 nvec_nonempty: 1198 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 730908 shallow: 0 total: 730908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 476424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150af00 shallow: 0 size: 238212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59552 entries, memory: 713.8 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00811 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75218e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34876 +bucket 2: 24366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59243 + vlen: 2003 nvec_nonempty: 1175 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 727188 shallow: 0 total: 727188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492600 shallow: 0 size: 236972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59242 entries, memory: 710.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00807 sec ] + [ GrB_select (hyper to sparse) + 0.00744 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492400 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00806 sec ] + [ GrB_select (hyper to sparse) + 0.00657 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.94e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492400 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00807 sec ] + [ GrB_select (hyper to sparse) + 0.00737 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34622 +bucket 2: 24236 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58859 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 722580 shallow: 0 total: 722580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 470872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1491a00 shallow: 0 size: 235436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58858 entries, memory: 705.6 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00813 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71055e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34400 +bucket 2: 24134 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58535 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 718692 shallow: 0 total: 718692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 468280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1491000 shallow: 0 size: 234140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58534 entries, memory: 701.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.69713e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34242 +bucket 2: 24062 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58305 + vlen: 2003 nvec_nonempty: 1165 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 715932 shallow: 0 total: 715932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 466440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490900 shallow: 0 size: 233220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58304 entries, memory: 699.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00803 sec ] + [ GrB_select (hyper to sparse) + 0.00655 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68771e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34156 +bucket 2: 23986 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58143 + vlen: 2003 nvec_nonempty: 1163 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 713988 shallow: 0 total: 713988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 465144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490300 shallow: 0 size: 232572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58142 entries, memory: 697.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.008 sec ] + [ GrB_select (hyper to sparse) + 0.0073 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67635e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34032 +bucket 2: 23914 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57947 + vlen: 2003 nvec_nonempty: 1161 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 711636 shallow: 0 total: 711636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 463576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148fd00 shallow: 0 size: 231788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57946 entries, memory: 695.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00798 sec ] + [ GrB_select (hyper to sparse) + 0.00648 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66884e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33960 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57817 + vlen: 2003 nvec_nonempty: 1156 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 710076 shallow: 0 total: 710076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f900 shallow: 0 size: 231268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57816 entries, memory: 693.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66642e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33938 +bucket 2: 23836 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57775 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 709572 shallow: 0 total: 709572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f800 shallow: 0 size: 231100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57774 entries, memory: 692.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00648 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23806 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57741 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 709164 shallow: 0 total: 709164 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461928 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 230964 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57740 entries, memory: 692.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00794 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66307e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23782 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57717 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 708876 shallow: 0 total: 708876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57716 entries, memory: 692.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00647 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (hyper to sparse) + 0.00722 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000124 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.0064 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.61411e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33340 +bucket 2: 23520 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56861 + vlen: 2003 nvec_nonempty: 1145 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 698604 shallow: 0 total: 698604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 454888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148db00 shallow: 0 size: 227444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56860 entries, memory: 682.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0079 sec ] + [ GrB_select (hyper to sparse) + 0.00807 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.57629e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32886 +bucket 2: 23304 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56191 + vlen: 2003 nvec_nonempty: 1126 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 690564 shallow: 0 total: 690564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 449528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148c600 shallow: 0 size: 224764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56190 entries, memory: 674.4 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00779 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55449e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32744 +bucket 2: 23056 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55801 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 685884 shallow: 0 total: 685884 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 446408 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148ba00 shallow: 0 size: 223204 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55800 entries, memory: 669.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0078 sec ] + [ GrB_select (hyper to sparse) + 0.00625 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32734 +bucket 2: 22874 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55609 + vlen: 2003 nvec_nonempty: 1112 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 683580 shallow: 0 total: 683580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 444872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b400 shallow: 0 size: 222436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55608 entries, memory: 667.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00781 sec ] + [ GrB_select (hyper to sparse) + 0.00698 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32638 +bucket 2: 22854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55493 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 682188 shallow: 0 total: 682188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55492 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00785 sec ] + [ GrB_select (hyper to sparse) + 0.00623 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00782 sec ] + [ GrB_select (hyper to sparse) + 0.00698 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000117 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0079 sec ] + [ GrB_select (hyper to sparse) + 0.00624 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.50245e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32174 +bucket 2: 22684 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54859 + vlen: 2003 nvec_nonempty: 1100 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 674580 shallow: 0 total: 674580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 438872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1489d00 shallow: 0 size: 219436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54858 entries, memory: 658.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00776 sec ] + [ GrB_select (hyper to sparse) + 0.00689 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45808e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31858 +bucket 2: 22184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54043 + vlen: 2003 nvec_nonempty: 1091 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 664788 shallow: 0 total: 664788 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 432344 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1488300 shallow: 0 size: 216172 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54042 entries, memory: 649.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00771 sec ] + [ GrB_select (hyper to sparse) + 0.00612 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42407e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 21822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53409 + vlen: 2003 nvec_nonempty: 1084 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 657180 shallow: 0 total: 657180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 427272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1487000 shallow: 0 size: 213636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53408 entries, memory: 641.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00762 sec ] + [ GrB_select (hyper to sparse) + 0.00683 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486a00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00763 sec ] + [ GrB_select (hyper to sparse) + 0.00606 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.12e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486a00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00756 sec ] + [ GrB_select (hyper to sparse) + 0.00672 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.35413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31126 +bucket 2: 20954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52081 + vlen: 2003 nvec_nonempty: 1060 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 641244 shallow: 0 total: 641244 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 416648 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484600 shallow: 0 size: 208324 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52080 entries, memory: 626.2 KB + + column: 0 : 15 entries [0:14] + row 1: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + row 167: 14 + column: 1 : 15 entries [15:29] + row 0: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + ... + Pending (nil) + + 0.00752 sec ] + [ GrB_select (hyper to sparse) + 0.00655 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.289e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 20226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50813 + vlen: 2003 nvec_nonempty: 1029 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 626028 shallow: 0 total: 626028 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 406504 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 203252 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50812 entries, memory: 611.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00748 sec ] + [ GrB_select (hyper to sparse) + 0.00587 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25563e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30158 +bucket 2: 19992 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50151 + vlen: 2003 nvec_nonempty: 1016 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 618084 shallow: 0 total: 618084 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 401208 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480a00 shallow: 0 size: 200604 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50150 entries, memory: 603.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0074 sec ] + [ GrB_select (hyper to sparse) + 0.00657 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24304e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29956 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49899 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 615060 shallow: 0 total: 615060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 399192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480200 shallow: 0 size: 199596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49898 entries, memory: 600.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00737 sec ] + [ GrB_select (hyper to sparse) + 0.00576 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00736 sec ] + [ GrB_select (hyper to sparse) + 0.00654 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00732 sec ] + [ GrB_select (hyper to sparse) + 0.00576 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdc00 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00738 sec ] + [ GrB_select (hyper to sparse) + 0.00574 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.18699e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29308 +bucket 2: 19452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48761 + vlen: 2003 nvec_nonempty: 984 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 601404 shallow: 0 total: 601404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 390088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147de00 shallow: 0 size: 195044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48760 entries, memory: 587.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00728 sec ] + [ GrB_select (hyper to sparse) + 0.00603 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13766e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28540 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47737 + vlen: 2003 nvec_nonempty: 937 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 589116 shallow: 0 total: 589116 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 381896 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147be00 shallow: 0 size: 190948 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47736 entries, memory: 575.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00726 sec ] + [ GrB_select (hyper to sparse) + 0.0056 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13337e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28450 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47647 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 588036 shallow: 0 total: 588036 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 381176 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147bb00 shallow: 0 size: 190588 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47646 entries, memory: 574.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00723 sec ] + [ GrB_select (hyper to sparse) + 0.00634 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28390 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47587 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 587316 shallow: 0 total: 587316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 380696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147ba00 shallow: 0 size: 190348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47586 entries, memory: 573.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00724 sec ] + [ GrB_select (hyper to sparse) + 0.0056 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147b600 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0072 sec ] + [ GrB_select (hyper to sparse) + 0.00635 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdd00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147b600 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00726 sec ] + [ GrB_select (hyper to sparse) + 0.00551 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.08267e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 27588 +bucket 2: 18980 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46569 + vlen: 2003 nvec_nonempty: 914 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 575100 shallow: 0 total: 575100 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 372552 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479a00 shallow: 0 size: 186276 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46568 entries, memory: 561.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 18 entries [0:17] + row 7: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 16 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [18:35] + row 6: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + ... + Pending (nil) + + 0.00715 sec ] + [ GrB_select (hyper to sparse) + 0.0054 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02496e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26894 +bucket 2: 18416 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45311 + vlen: 2003 nvec_nonempty: 858 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 560004 shallow: 0 total: 560004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 362488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1477200 shallow: 0 size: 181244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45310 entries, memory: 546.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 16 entries [0:15] + row 7: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + row 159: 15 + row 160: 15 + row 161: 15 + column: 7 : 16 entries [16:31] + row 6: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + ... + Pending (nil) + + 0.00706 sec ] + [ GrB_select (hyper to sparse) + 0.00611 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00229e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26438 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44807 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 553956 shallow: 0 total: 553956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476300 shallow: 0 size: 179228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44806 entries, memory: 541.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.00533 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (hyper to sparse) + 0.00607 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.36e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00532 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdc00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc141ea00 shallow: 0 size: 16032 + ->i: 0x7effc1422900 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479700 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00608 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000133 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc141ea00 shallow: 0 size: 16032 + ->i: 0x7effc1422900 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479700 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00701 sec ] + [ GrB_select (hyper to sparse) + 0.0053 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000116 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1422900 shallow: 0 size: 16032 + ->i: 0x7effc1426800 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147d600 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00696 sec ] + [ GrB_select (hyper to sparse) + 0.00607 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987221 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26344 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44469 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 549900 shallow: 0 total: 549900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481500 shallow: 0 size: 177876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44468 entries, memory: 537.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00686 sec ] + [ GrB_select (hyper to sparse) + 0.00599 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481400 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00684 sec ] + [ GrB_select (hyper to sparse) + 0.00526 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000112 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481400 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00691 sec ] + [ GrB_select (hyper to sparse) + 0.00604 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:975092 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26240 +bucket 2: 17954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44195 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 546612 shallow: 0 total: 546612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 353560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484c00 shallow: 0 size: 176780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44194 entries, memory: 533.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00683 sec ] + [ GrB_select (hyper to sparse) + 0.00524 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:954375 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25948 +bucket 2: 17774 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43723 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 540948 shallow: 0 total: 540948 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 349784 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1483d00 shallow: 0 size: 174892 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43722 entries, memory: 528.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00677 sec ] + [ GrB_select (hyper to sparse) + 0.00595 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:929136 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25466 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43141 + vlen: 2003 nvec_nonempty: 809 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 533964 shallow: 0 total: 533964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 345128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1482b00 shallow: 0 size: 172564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43140 entries, memory: 521.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0067 sec ] + [ GrB_select (hyper to sparse) + 0.00519 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1482700 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0067 sec ] + [ GrB_select (hyper to sparse) + 0.00527 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00666 sec ] + [ GrB_select (hyper to sparse) + 0.00554 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00666 sec ] + [ GrB_select (hyper to sparse) + 0.00589 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00666 sec ] + [ GrB_select (hyper to sparse) + 0.00589 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.19e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556a00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0067 sec ] + [ GrB_select (hyper to sparse) + 0.00575 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825948 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25120 +bucket 2: 15554 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40675 + vlen: 2003 nvec_nonempty: 789 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 504372 shallow: 0 total: 504372 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142e600 shallow: 0 size: 16032 + ->i: 0x7effc1432500 shallow: 0 size: 325400 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 162700 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40674 entries, memory: 492.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00655 sec ] + [ GrB_select (hyper to sparse) + 0.00469 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:672510 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23224 +bucket 2: 13478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 36703 + vlen: 2003 nvec_nonempty: 736 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 456708 shallow: 0 total: 456708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 293624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 146812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 36702 entries, memory: 446.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00626 sec ] + [ GrB_select (hyper to sparse) + 0.00452 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:629110 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23236 +bucket 2: 12262 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35499 + vlen: 2003 nvec_nonempty: 698 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 442260 shallow: 0 total: 442260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1508400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 283992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146fd00 shallow: 0 size: 141996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35498 entries, memory: 431.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00615 sec ] + [ GrB_select (hyper to sparse) + 0.00487 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:619084 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23468 +bucket 2: 11746 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35215 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 438852 shallow: 0 total: 438852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fe400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 281720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146f400 shallow: 0 size: 140860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35214 entries, memory: 428.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00616 sec ] + [ GrB_select (hyper to sparse) + 0.00445 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612282 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23654 +bucket 2: 11366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35021 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 436524 shallow: 0 total: 436524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fcb00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 280168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146ee00 shallow: 0 size: 140084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35020 entries, memory: 426.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00614 sec ] + [ GrB_select (hyper to sparse) + 0.00501 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fbb00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ffa00 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0061 sec ] + [ GrB_select (hyper to sparse) + 0.00479 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb700 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ff600 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00614 sec ] + [ GrB_select (hyper to sparse) + 0.00439 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.57e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb500 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ff400 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00614 sec ] + [ GrB_select (hyper to sparse) + 0.00398 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418019 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24542 +bucket 2: 4394 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28937 + vlen: 2003 nvec_nonempty: 630 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 363516 shallow: 0 total: 363516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ff400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 231496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 115748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28936 entries, memory: 355.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00586 sec ] + [ GrB_select (hyper to sparse) + 0.00377 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:328878 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25594 +bucket 2: 72 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25667 + vlen: 2003 nvec_nonempty: 579 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 324276 shallow: 0 total: 324276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ff400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 205336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 102668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25666 entries, memory: 316.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00457 sec ] + [ GrB_select (hyper to sparse) + 0.00366 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ab500 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00414 sec ] + [ GrB_select (hyper to sparse) + 0.0036 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000108 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14a9100 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00412 sec ] + [ GrB_select (hyper to sparse) + 0.00435 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000103 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556d00 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ad000 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00418 sec ] + [ GrB_select (hyper to sparse) + 0.00312 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:151605 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17426 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17427 + vlen: 2003 nvec_nonempty: 480 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 225396 shallow: 0 total: 225396 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb500 shallow: 0 size: 16032 + ->i: 0x7effc14ff400 shallow: 0 size: 139416 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 69708 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17426 entries, memory: 220.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00345 sec ] + [ GrB_select (hyper to sparse) + 0.00266 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106625 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14614 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14615 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc148c700 number of memory blocks: 4 + deep: 191652 shallow: 0 total: 191652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ad000 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14614 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00325 sec ] + [ GrB_select (hyper to sparse) + 0.00261 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1480100 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0035 sec ] + [ GrB_select (hyper to sparse) + 0.00256 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.68e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556e00 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00316 sec ] + [ GrB_select (hyper to sparse) + 0.00255 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:88339.1 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 13302 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 13303 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 175908 shallow: 0 total: 175908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1483e00 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 106424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1515500 shallow: 0 size: 53212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 13302 entries, memory: 171.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00304 sec ] + [ GrB_select (hyper to sparse) + 0.00289 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:77211.2 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12436 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12437 + vlen: 2003 nvec_nonempty: 314 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1454a00 number of memory blocks: 4 + deep: 165516 shallow: 0 total: 165516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 99496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12436 entries, memory: 161.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00304 sec ] + [ GrB_select (hyper to sparse) + 0.00316 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00293 sec ] + [ GrB_select (hyper to sparse) + 0.00248 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.58e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556f00 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00298 sec ] + [ GrB_select + 0.00143 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:66277 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 3668 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 3669 + vlen: 2003 nvec_nonempty: 203 nvec: 203 plen: 203 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc014fa00 number of memory blocks: 5 + deep: 47524 shallow: 0 total: 47524 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc030b600 shallow: 0 size: 1624 + ->p: 0x7effc06b0700 shallow: 0 size: 1632 + ->i: 0x7effc142a700 shallow: 0 size: 29352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147ff00 shallow: 0 size: 14676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 3668 entries, memory: 46.4 KB + + column: 933 : 13 entries [0:12] + row 934: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 934 : 13 entries [13:25] + row 933: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 935 : 13 entries [26:38] + row 933: 12 + row 934: 12 + row 936: 12 + ... + Pending (nil) + + 0.00166 sec ] + [ GrB_select + 0.00101 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:1512 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 252 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 253 + vlen: 2003 nvec_nonempty: 42 nvec: 42 plen: 42 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06b0100 number of memory blocks: 5 + deep: 3956 shallow: 0 total: 3956 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc1557200 shallow: 0 size: 336 + ->p: 0x7effc1557000 shallow: 0 size: 344 + ->i: 0x7effc06ff400 shallow: 0 size: 2024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc030b600 shallow: 0 size: 1012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 252 entries, memory: 3.9 KB + + column: 1031 : 6 entries [0:5] + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1032 : 6 entries [6:11] + row 1031: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1033 : 6 entries [12:17] + row 1031: 5 + row 1032: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1034 : 6 entries [18:23] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1038 : 6 entries [24:29] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1039: 5 + ... + Pending (nil) + + 0.00151 sec ] + [ GrB_select C is empty, iso 0 + + 0.000446 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 + 0.000172 sec ] + [ GrB_select C is empty, iso 0 + + 0.000389 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:0 gpus:0 + 4.63e-06 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0019 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000123 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00162 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00165 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000116 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00158 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00166 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000106 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00161 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00156 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.82e-05 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00154 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.52e-05 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00152 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.97e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00149 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00011 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00183 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.79e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0018 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000112 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00171 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.65e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00169 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.74e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00159 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.94e-05 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00151 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.67e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00148 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.39e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 1.49e-07 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00147 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.84e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0015 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.54e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0014 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.52e-05 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00143 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.53e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00127 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.73e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000998 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.46e-05 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000986 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.43e-05 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000752 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.82e-05 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000723 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.3e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=H.*H) + 0.000248 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 4.28e-06 sec ] +[ OK ] +SUCCESS: All unit tests have passed. diff --git a/save_errors/o4 b/save_errors/o4 new file mode 100644 index 0000000000..b93e784cfd --- /dev/null +++ b/save_errors/o4 @@ -0,0 +1,29910 @@ +Test allktruss... GB_cuda_get_device_count: 4, cudaError_t: 0 + +Device: 0: memory: 17071800320 SMs: 56 compute: 6.0 +GB_cuda_init: 0 + +================================== bcsstk13.mtx: + [ GrB_Matrix_build_FP64 (cast J 1 0) (step1: 0.00222955 sec) (step2: 0.0206194 sec) (build, 1 threads) (step3: 0.000837199 sec) (step4: 0.000639021 sec) (jit: cpu load) (step5: 0.0010035 sec) (build 32/32 time: 0.025375) (hyper to sparse) (wrapup 64/64 time: 0.00322786) (convert ints 32/32 to 64/64, time: 0.000661053) + 0.0293 sec ] + [ GxB_Vector_diag (jit: cuda load) (sparse to hyper) (sparse to full) + 0.00585 sec ] + [ GrB_Vector_nvals + 1.53e-06 sec ] +graph has 2003 self edges + [ GrB_select (jit: cuda load) + 0.00709 sec ] +now has 0 self edges + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc01fba00 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc01ff900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (jit: cuda load) (hyper to sparse) (jit: cuda load) (hyper to sparse) + 0.0209 sec ] + [ GrB_Matrix_nvals + 2.09e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024f800 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.00911 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000327 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc024b900 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc014bf00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0105 sec ] + [ GrB_select (hyper to sparse) + 0.00867 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024b900 shallow: 0 size: 16032 + ->i: 0x7effc0008200 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0148000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00855 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.16e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00959 sec ] + [ GrB_select (hyper to sparse) + 0.00934 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0393000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00971 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.018 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00959 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0057600 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00981 sec ] + [ GrB_select (hyper to sparse) + 0.00849 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.48e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (hyper to sparse) + 0.0093 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27866e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52638 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81039 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988740 shallow: 0 total: 988740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81038 entries, memory: 965.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00959 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52600 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81001 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988284 shallow: 0 total: 988284 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648008 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 324004 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81000 entries, memory: 965.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00926 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.273e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52568 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80969 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987900 shallow: 0 total: 987900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80968 entries, memory: 964.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0096 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00985 sec ] + [ GrB_select (hyper to sparse) + 0.00922 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00961 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (hyper to sparse) + 0.00923 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000101 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00968 sec ] + [ GrB_select (hyper to sparse) + 0.00864 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.20416e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51834 +bucket 2: 28278 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80113 + vlen: 2003 nvec_nonempty: 1935 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 977628 shallow: 0 total: 977628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 640904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 320452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80112 entries, memory: 954.7 KB + pending tuples: 0 max pending: 0 zombies: 36 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00954 sec ] + [ GrB_select (wait:A 36 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.17559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51490 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79755 + vlen: 2003 nvec_nonempty: 1926 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 973332 shallow: 0 total: 973332 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 638040 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc08c2f00 shallow: 0 size: 319020 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79754 entries, memory: 950.5 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.15333e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51226 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79475 + vlen: 2003 nvec_nonempty: 1911 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 969972 shallow: 0 total: 969972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038f000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 635800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79474 entries, memory: 947.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00946 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14223e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51092 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79335 + vlen: 2003 nvec_nonempty: 1901 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 968292 shallow: 0 total: 968292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79334 entries, memory: 945.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00954 sec ] + [ GrB_select (hyper to sparse) + 0.00911 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00946 sec ] + [ GrB_select (hyper to sparse) + 0.00915 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (hyper to sparse) + 0.00839 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00912 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00946 sec ] + [ GrB_select (hyper to sparse) + 0.00914 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00834 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.04165e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49922 +bucket 2: 28132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 78055 + vlen: 2003 nvec_nonempty: 1878 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 952932 shallow: 0 total: 952932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 624440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 312220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 78054 entries, memory: 930.6 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00941 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0174 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.97438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49104 +bucket 2: 28082 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77187 + vlen: 2003 nvec_nonempty: 1839 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 942516 shallow: 0 total: 942516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038e800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 617496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc09aab00 shallow: 0 size: 308748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77186 entries, memory: 920.4 KB + pending tuples: 0 max pending: 0 zombies: 16 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00938 sec ] + [ GrB_select (wait:A 16 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.94317e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48762 +bucket 2: 28018 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76781 + vlen: 2003 nvec_nonempty: 1813 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 937644 shallow: 0 total: 937644 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038db00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 614248 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 307124 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76780 entries, memory: 915.7 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93367e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48680 +bucket 2: 27976 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76657 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 936156 shallow: 0 total: 936156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc072e800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 613256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc09aab00 shallow: 0 size: 306628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76656 entries, memory: 914.2 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00935 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93092e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48660 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76621 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935724 shallow: 0 total: 935724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038d600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76620 entries, memory: 913.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00924 sec ] + [ GrB_select (hyper to sparse) + 0.00868 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0390f00 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (hyper to sparse) + 0.00895 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (hyper to sparse) + 0.00818 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00926 sec ] + [ GrB_select (hyper to sparse) + 0.00999 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (hyper to sparse) + 0.00983 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.78437e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47082 +bucket 2: 27598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74681 + vlen: 2003 nvec_nonempty: 1767 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 912444 shallow: 0 total: 912444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 597448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 298724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74680 entries, memory: 891.1 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00912 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.70046e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46074 +bucket 2: 27472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73547 + vlen: 2003 nvec_nonempty: 1701 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 898836 shallow: 0 total: 898836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0383700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 588376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0a8b200 shallow: 0 size: 294188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73546 entries, memory: 877.8 KB + pending tuples: 0 max pending: 0 zombies: 46 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00904 sec ] + [ GrB_select (wait:A 46 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67745e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45800 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73233 + vlen: 2003 nvec_nonempty: 1643 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 895068 shallow: 0 total: 895068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0382d00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73232 entries, memory: 874.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00913 sec ] + [ GrB_select (hyper to sparse) + 0.00835 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67423e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45756 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73189 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 894540 shallow: 0 total: 894540 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585512 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292756 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73188 entries, memory: 873.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (hyper to sparse) + 0.0078 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6735e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45748 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73179 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824700 number of memory blocks: 4 + deep: 894420 shallow: 0 total: 894420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73178 entries, memory: 873.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (hyper to sparse) + 0.00856 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824700 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00936 sec ] + [ GrB_select (hyper to sparse) + 0.0078 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000112 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00935 sec ] + [ GrB_select (hyper to sparse) + 0.00862 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.64363e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45482 +bucket 2: 27286 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72769 + vlen: 2003 nvec_nonempty: 1633 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 889500 shallow: 0 total: 889500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 582152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 291076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72768 entries, memory: 868.7 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00892 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45228 +bucket 2: 27192 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72421 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 885324 shallow: 0 total: 885324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081c800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 579368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0b61900 shallow: 0 size: 289684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72420 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00895 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6093e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45104 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72295 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 883812 shallow: 0 total: 883812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc072d600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72294 entries, memory: 863.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00892 sec ] + [ GrB_select (hyper to sparse) + 0.00776 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824800 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015b800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00891 sec ] + [ GrB_select (hyper to sparse) + 0.00847 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000202 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015eb00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00893 sec ] + [ GrB_select (hyper to sparse) + 0.00769 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.51488e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 44110 +bucket 2: 26864 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70975 + vlen: 2003 nvec_nonempty: 1605 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 867972 shallow: 0 total: 867972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 567800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 283900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70974 entries, memory: 847.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.45627e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43418 +bucket 2: 26724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70143 + vlen: 2003 nvec_nonempty: 1580 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 857988 shallow: 0 total: 857988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037cc00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 561144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 280572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70142 entries, memory: 837.9 KB + pending tuples: 0 max pending: 0 zombies: 22 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (wait:A 22 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.41651e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43012 +bucket 2: 26560 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69573 + vlen: 2003 nvec_nonempty: 1564 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 851148 shallow: 0 total: 851148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037fa00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 556584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 278292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69572 entries, memory: 831.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00878 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39834e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42848 +bucket 2: 26462 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69311 + vlen: 2003 nvec_nonempty: 1552 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 848004 shallow: 0 total: 848004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037b200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 554488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 277244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69310 entries, memory: 828.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00876 sec ] + [ GrB_select (hyper to sparse) + 0.00799 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc015eb00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0c35600 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.00878 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc037e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00888 sec ] + [ GrB_select (hyper to sparse) + 0.00822 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824900 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00877 sec ] + [ GrB_select (hyper to sparse) + 0.00748 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000123 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00881 sec ] + [ GrB_select (hyper to sparse) + 0.00821 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.30569e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41946 +bucket 2: 26012 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67959 + vlen: 2003 nvec_nonempty: 1533 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 831780 shallow: 0 total: 831780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 543672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 271836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67958 entries, memory: 812.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00869 sec ] + [ GrB_select (hyper to sparse) + 0.0073 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.23646e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41162 +bucket 2: 25768 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66931 + vlen: 2003 nvec_nonempty: 1506 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 819444 shallow: 0 total: 819444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 535448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 267724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66930 entries, memory: 800.2 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.016 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21965e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41002 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66679 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 816420 shallow: 0 total: 816420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ad00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 533432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66678 entries, memory: 797.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (hyper to sparse) + 0.00726 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21659e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40982 +bucket 2: 25650 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66633 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815868 shallow: 0 total: 815868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ad00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 533064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66632 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00851 sec ] + [ GrB_select (hyper to sparse) + 0.00802 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21552e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40968 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66617 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815676 shallow: 0 total: 815676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66616 entries, memory: 796.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00877 sec ] + [ GrB_select (hyper to sparse) + 0.00722 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00861 sec ] + [ GrB_select (hyper to sparse) + 0.008 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ac00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00851 sec ] + [ GrB_select (hyper to sparse) + 0.00799 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ab00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00887 sec ] + [ GrB_select (hyper to sparse) + 0.00722 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081ab00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (hyper to sparse) + 0.00901 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081aa00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00892 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0089 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00869 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (hyper to sparse) + 0.00899 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00901 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824a00 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (hyper to sparse) + 0.009 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.42e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00854 sec ] + [ GrB_select (hyper to sparse) + 0.00719 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.88498e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36722 +bucket 2: 24724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 61447 + vlen: 2003 nvec_nonempty: 1387 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 753636 shallow: 0 total: 753636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 491576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0cff700 shallow: 0 size: 245788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 61446 entries, memory: 736.0 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 16 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00831 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0139 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77056e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35084 +bucket 2: 24468 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59553 + vlen: 2003 nvec_nonempty: 1198 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 730908 shallow: 0 total: 730908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0d39a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 476424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 238212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59552 entries, memory: 713.8 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00813 sec ] + [ GrB_select (hyper to sparse) + 0.00732 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75218e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34876 +bucket 2: 24366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59243 + vlen: 2003 nvec_nonempty: 1175 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 727188 shallow: 0 total: 727188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59242 entries, memory: 710.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00802 sec ] + [ GrB_select (hyper to sparse) + 0.00656 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824b00 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0d39400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00814 sec ] + [ GrB_select (hyper to sparse) + 0.0073 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.07e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc081a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00803 sec ] + [ GrB_select (hyper to sparse) + 0.00657 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34622 +bucket 2: 24236 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58859 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 722580 shallow: 0 total: 722580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 470872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 235436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58858 entries, memory: 705.6 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.0081 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71055e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34400 +bucket 2: 24134 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58535 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 718692 shallow: 0 total: 718692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 468280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 234140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58534 entries, memory: 701.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00802 sec ] + [ GrB_select (hyper to sparse) + 0.00653 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.69713e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34242 +bucket 2: 24062 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58305 + vlen: 2003 nvec_nonempty: 1165 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 715932 shallow: 0 total: 715932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 466440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 233220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58304 entries, memory: 699.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00801 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68771e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34156 +bucket 2: 23986 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58143 + vlen: 2003 nvec_nonempty: 1163 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 713988 shallow: 0 total: 713988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 465144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 232572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58142 entries, memory: 697.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00799 sec ] + [ GrB_select (hyper to sparse) + 0.00648 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67635e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34032 +bucket 2: 23914 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57947 + vlen: 2003 nvec_nonempty: 1161 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 711636 shallow: 0 total: 711636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 463576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57946 entries, memory: 695.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00729 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66884e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33960 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57817 + vlen: 2003 nvec_nonempty: 1156 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 710076 shallow: 0 total: 710076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 462536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57816 entries, memory: 693.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00647 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66642e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33938 +bucket 2: 23836 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57775 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 709572 shallow: 0 total: 709572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 462200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 231100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57774 entries, memory: 692.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00794 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23806 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57741 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 709164 shallow: 0 total: 709164 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461928 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230964 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57740 entries, memory: 692.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00794 sec ] + [ GrB_select (hyper to sparse) + 0.0065 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66307e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23782 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57717 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 708876 shallow: 0 total: 708876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57716 entries, memory: 692.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00724 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0824c00 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00648 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000125 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0162a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00803 sec ] + [ GrB_select (hyper to sparse) + 0.00722 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.61411e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33340 +bucket 2: 23520 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56861 + vlen: 2003 nvec_nonempty: 1145 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 698604 shallow: 0 total: 698604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 454888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 227444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56860 entries, memory: 682.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00785 sec ] + [ GrB_select (hyper to sparse) + 0.00634 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.57629e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32886 +bucket 2: 23304 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56191 + vlen: 2003 nvec_nonempty: 1126 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 690564 shallow: 0 total: 690564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 449528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 224764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56190 entries, memory: 674.4 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00779 sec ] + [ GrB_select (hyper to sparse) + 0.00704 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55449e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32744 +bucket 2: 23056 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55801 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0398000 number of memory blocks: 4 + deep: 685884 shallow: 0 total: 685884 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 446408 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 223204 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55800 entries, memory: 669.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00791 sec ] + [ GrB_select (hyper to sparse) + 0.00675 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32734 +bucket 2: 22874 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55609 + vlen: 2003 nvec_nonempty: 1112 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 683580 shallow: 0 total: 683580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 444872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 222436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55608 entries, memory: 667.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00787 sec ] + [ GrB_select (hyper to sparse) + 0.00703 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32638 +bucket 2: 22854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55493 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 682188 shallow: 0 total: 682188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55492 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0078 sec ] + [ GrB_select (hyper to sparse) + 0.0063 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00782 sec ] + [ GrB_select (hyper to sparse) + 0.00707 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.98e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0166900 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00786 sec ] + [ GrB_select (hyper to sparse) + 0.0062 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.50245e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32174 +bucket 2: 22684 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54859 + vlen: 2003 nvec_nonempty: 1100 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 674580 shallow: 0 total: 674580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 438872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 219436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54858 entries, memory: 658.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00772 sec ] + [ GrB_select (hyper to sparse) + 0.00692 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45808e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31858 +bucket 2: 22184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54043 + vlen: 2003 nvec_nonempty: 1091 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 664788 shallow: 0 total: 664788 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 432344 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 216172 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54042 entries, memory: 649.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00766 sec ] + [ GrB_select (hyper to sparse) + 0.00607 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42407e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 21822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53409 + vlen: 2003 nvec_nonempty: 1084 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0736900 number of memory blocks: 4 + deep: 657180 shallow: 0 total: 657180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 427272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 213636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53408 entries, memory: 641.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0077 sec ] + [ GrB_select (hyper to sparse) + 0.00606 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00773 sec ] + [ GrB_select (hyper to sparse) + 0.00682 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.09e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016a800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00757 sec ] + [ GrB_select (hyper to sparse) + 0.00602 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.35413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31126 +bucket 2: 20954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52081 + vlen: 2003 nvec_nonempty: 1060 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 641244 shallow: 0 total: 641244 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 416648 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 208324 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52080 entries, memory: 626.2 KB + + column: 0 : 15 entries [0:14] + row 1: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + row 167: 14 + column: 1 : 15 entries [15:29] + row 0: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + ... + Pending (nil) + + 0.00746 sec ] + [ GrB_select (hyper to sparse) + 0.00666 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.289e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 20226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50813 + vlen: 2003 nvec_nonempty: 1029 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 626028 shallow: 0 total: 626028 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 406504 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 203252 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50812 entries, memory: 611.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00748 sec ] + [ GrB_select (hyper to sparse) + 0.00582 sec ] + [ GrB_Matrix_nvals + 1.42e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25563e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30158 +bucket 2: 19992 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50151 + vlen: 2003 nvec_nonempty: 1016 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 618084 shallow: 0 total: 618084 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 401208 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 200604 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50150 entries, memory: 603.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00738 sec ] + [ GrB_select (hyper to sparse) + 0.00656 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24304e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29956 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49899 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 615060 shallow: 0 total: 615060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 399192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 199596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49898 entries, memory: 600.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00737 sec ] + [ GrB_select (hyper to sparse) + 0.00579 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00734 sec ] + [ GrB_select (hyper to sparse) + 0.00652 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6900 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00731 sec ] + [ GrB_select (hyper to sparse) + 0.0058 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6c00 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc016e700 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00739 sec ] + [ GrB_select (hyper to sparse) + 0.00646 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.18699e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29308 +bucket 2: 19452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48761 + vlen: 2003 nvec_nonempty: 984 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 601404 shallow: 0 total: 601404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 390088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 195044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48760 entries, memory: 587.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00724 sec ] + [ GrB_select (hyper to sparse) + 0.00562 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13766e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28540 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47737 + vlen: 2003 nvec_nonempty: 937 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 589116 shallow: 0 total: 589116 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 381896 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190948 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47736 entries, memory: 575.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00724 sec ] + [ GrB_select (hyper to sparse) + 0.00634 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13337e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28450 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47647 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 588036 shallow: 0 total: 588036 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 381176 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190588 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47646 entries, memory: 574.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00722 sec ] + [ GrB_select (hyper to sparse) + 0.00557 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28390 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47587 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 587316 shallow: 0 total: 587316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 380696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 190348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47586 entries, memory: 573.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00722 sec ] + [ GrB_select (hyper to sparse) + 0.00632 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6a00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.0072 sec ] + [ GrB_select (hyper to sparse) + 0.00561 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000114 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6d00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0172600 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00724 sec ] + [ GrB_select (hyper to sparse) + 0.00554 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.08267e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 27588 +bucket 2: 18980 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46569 + vlen: 2003 nvec_nonempty: 914 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 575100 shallow: 0 total: 575100 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 372552 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 186276 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46568 entries, memory: 561.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 18 entries [0:17] + row 7: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 16 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [18:35] + row 6: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + ... + Pending (nil) + + 0.00712 sec ] + [ GrB_select (hyper to sparse) + 0.00581 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02496e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26894 +bucket 2: 18416 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45311 + vlen: 2003 nvec_nonempty: 858 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 560004 shallow: 0 total: 560004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 362488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 181244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45310 entries, memory: 546.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 16 entries [0:15] + row 7: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + row 159: 15 + row 160: 15 + row 161: 15 + column: 7 : 16 entries [16:31] + row 6: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + ... + Pending (nil) + + 0.00701 sec ] + [ GrB_select (hyper to sparse) + 0.00533 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00229e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26438 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44807 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 553956 shallow: 0 total: 553956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44806 entries, memory: 541.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00686 sec ] + [ GrB_select (hyper to sparse) + 0.00606 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6b00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00529 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0176500 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00695 sec ] + [ GrB_select (hyper to sparse) + 0.00609 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6c00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017a400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00682 sec ] + [ GrB_select (hyper to sparse) + 0.00531 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017a400 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00603 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc017e300 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.00531 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987221 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26344 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44469 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 549900 shallow: 0 total: 549900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44468 entries, memory: 537.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00685 sec ] + [ GrB_select (hyper to sparse) + 0.00599 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6e00 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00532 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000114 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0182200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00691 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:975092 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26240 +bucket 2: 17954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44195 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 546612 shallow: 0 total: 546612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 353560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 176780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44194 entries, memory: 533.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00677 sec ] + [ GrB_select (hyper to sparse) + 0.00523 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:954375 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25948 +bucket 2: 17774 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43723 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 540948 shallow: 0 total: 540948 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 349784 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 174892 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43722 entries, memory: 528.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00674 sec ] + [ GrB_select (hyper to sparse) + 0.00579 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:929136 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25466 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43141 + vlen: 2003 nvec_nonempty: 809 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 533964 shallow: 0 total: 533964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 345128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 172564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43140 entries, memory: 521.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00666 sec ] + [ GrB_select (hyper to sparse) + 0.00515 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00668 sec ] + [ GrB_select (hyper to sparse) + 0.00588 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00663 sec ] + [ GrB_select (hyper to sparse) + 0.00512 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00662 sec ] + [ GrB_select (hyper to sparse) + 0.00592 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6f00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00662 sec ] + [ GrB_select (hyper to sparse) + 0.00512 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000113 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7200 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0186100 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0067 sec ] + [ GrB_select (hyper to sparse) + 0.00572 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825948 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25120 +bucket 2: 15554 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40675 + vlen: 2003 nvec_nonempty: 789 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 504372 shallow: 0 total: 504372 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 325400 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 162700 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40674 entries, memory: 492.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0065 sec ] + [ GrB_select (hyper to sparse) + 0.00534 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:672510 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23224 +bucket 2: 13478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 36703 + vlen: 2003 nvec_nonempty: 736 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 456708 shallow: 0 total: 456708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 293624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 146812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 36702 entries, memory: 446.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00626 sec ] + [ GrB_select (hyper to sparse) + 0.00447 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:629110 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23236 +bucket 2: 12262 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35499 + vlen: 2003 nvec_nonempty: 698 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 442260 shallow: 0 total: 442260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 283992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 141996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35498 entries, memory: 431.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00619 sec ] + [ GrB_select (hyper to sparse) + 0.00446 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:619084 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23468 +bucket 2: 11746 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35215 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0700a00 number of memory blocks: 4 + deep: 438852 shallow: 0 total: 438852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 281720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 140860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35214 entries, memory: 428.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00632 sec ] + [ GrB_select (hyper to sparse) + 0.00516 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612282 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23654 +bucket 2: 11366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35021 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06ffe00 number of memory blocks: 4 + deep: 436524 shallow: 0 total: 436524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 280168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 140084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35020 entries, memory: 426.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00621 sec ] + [ GrB_select (hyper to sparse) + 0.00438 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00615 sec ] + [ GrB_select (hyper to sparse) + 0.00436 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00616 sec ] + [ GrB_select (hyper to sparse) + 0.00437 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.2e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dc1d00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00615 sec ] + [ GrB_select (hyper to sparse) + 0.00397 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418019 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24542 +bucket 2: 4394 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28937 + vlen: 2003 nvec_nonempty: 630 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 363516 shallow: 0 total: 363516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0358500 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 231496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 115748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28936 entries, memory: 355.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00581 sec ] + [ GrB_select (hyper to sparse) + 0.00376 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:328878 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25594 +bucket 2: 72 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25667 + vlen: 2003 nvec_nonempty: 579 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 324276 shallow: 0 total: 324276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 205336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 102668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25666 entries, memory: 316.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00459 sec ] + [ GrB_select (hyper to sparse) + 0.00407 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7100 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc035a700 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00426 sec ] + [ GrB_select (hyper to sparse) + 0.00436 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.5e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0359e00 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00414 sec ] + [ GrB_select (hyper to sparse) + 0.00359 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.22e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7500 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018a000 shallow: 0 size: 16032 + ->i: 0x7effc0dc1d00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00414 sec ] + [ GrB_select (hyper to sparse) + 0.00307 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:151605 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17426 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17427 + vlen: 2003 nvec_nonempty: 480 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 225396 shallow: 0 total: 225396 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 139416 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 69708 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17426 entries, memory: 220.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00344 sec ] + [ GrB_select (hyper to sparse) + 0.00263 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106625 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14614 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14615 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 191652 shallow: 0 total: 191652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0decb00 shallow: 0 size: 58460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14614 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00317 sec ] + [ GrB_select (hyper to sparse) + 0.00291 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7300 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00314 sec ] + [ GrB_select (hyper to sparse) + 0.00342 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000102 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7600 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00317 sec ] + [ GrB_select (hyper to sparse) + 0.00338 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:88339.1 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 13302 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 13303 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 175908 shallow: 0 total: 175908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0191e00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 106424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 53212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 13302 entries, memory: 171.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00304 sec ] + [ GrB_select (hyper to sparse) + 0.00244 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:77211.2 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12436 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12437 + vlen: 2003 nvec_nonempty: 314 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dcf00 number of memory blocks: 4 + deep: 165516 shallow: 0 total: 165516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 99496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12436 entries, memory: 161.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00307 sec ] + [ GrB_select (hyper to sparse) + 0.00283 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7400 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00345 sec ] + [ GrB_select (hyper to sparse) + 0.00311 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.14e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7700 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0dec900 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00296 sec ] + [ GrB_select + 0.00145 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:66277 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 3668 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 3669 + vlen: 2003 nvec_nonempty: 203 nvec: 203 plen: 203 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc014f900 number of memory blocks: 5 + deep: 47524 shallow: 0 total: 47524 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc06ffb00 shallow: 0 size: 1624 + ->p: 0x7effc06ff400 shallow: 0 size: 1632 + ->i: 0x7effc0dec900 shallow: 0 size: 29352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc06d9000 shallow: 0 size: 14676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 3668 entries, memory: 46.4 KB + + column: 933 : 13 entries [0:12] + row 934: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 934 : 13 entries [13:25] + row 933: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 935 : 13 entries [26:38] + row 933: 12 + row 934: 12 + row 936: 12 + ... + Pending (nil) + + 0.0019 sec ] + [ GrB_select (jit: cuda load) + 0.00196 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:1512 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 252 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 253 + vlen: 2003 nvec_nonempty: 42 nvec: 42 plen: 42 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 5 + deep: 3956 shallow: 0 total: 3956 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc0dfb500 shallow: 0 size: 336 + ->p: 0x7effc0dfb300 shallow: 0 size: 344 + ->i: 0x7effc00a7400 shallow: 0 size: 2024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc00a7c00 shallow: 0 size: 1012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 252 entries, memory: 3.9 KB + + column: 1031 : 6 entries [0:5] + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1032 : 6 entries [6:11] + row 1031: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1033 : 6 entries [12:17] + row 1031: 5 + row 1032: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1034 : 6 entries [18:23] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1038 : 6 entries [24:29] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1039: 5 + ... + Pending (nil) + + 0.0014 sec ] + [ GrB_select C is empty, iso 0 + + 0.000482 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 (jit: cpu load) + 0.000268 sec ] + [ GrB_select C is empty, iso 0 + + 0.000414 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:0 gpus:0 + 4.59e-06 sec ] +all k-truss: kmax 29 + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00984 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0174 sec ] + [ GrB_Matrix_nvals + 2.16e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00991 sec ] + [ GrB_select (hyper to sparse) + 0.00865 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) (jit: cpu load) + 0.00204 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000285 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000267 sec ] + [ GrB_Matrix_nvals + 1.56e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00993 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0179 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00997 sec ] + [ GrB_select (hyper to sparse) + 0.00863 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 4.47e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00185 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000112 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000133 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00998 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0186 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ac900 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00992 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0179 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155bb00 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (hyper to sparse) + 0.00849 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155bb00 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00997 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00186 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000115 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000109 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0178 sec ] + [ GrB_Matrix_nvals + 1.79e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28935e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52770 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81171 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 990324 shallow: 0 total: 990324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ac500 shallow: 0 size: 324684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81170 entries, memory: 967.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00994 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0185 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52622 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81023 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 988548 shallow: 0 total: 988548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155b400 shallow: 0 size: 324092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81022 entries, memory: 965.4 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52582 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80983 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 988068 shallow: 0 total: 988068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15aa300 shallow: 0 size: 323932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80982 entries, memory: 964.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00994 sec ] + [ GrB_select (hyper to sparse) + 0.00926 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15aa100 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (hyper to sparse) + 0.00924 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (hyper to sparse) + 0.00849 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00967 sec ] + [ GrB_select (hyper to sparse) + 0.00924 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00182 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000114 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000161 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0178 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.22419e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52082 +bucket 2: 28280 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80363 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 980628 shallow: 0 total: 980628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 642904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a9f00 shallow: 0 size: 321452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80362 entries, memory: 957.6 KB + pending tuples: 0 max pending: 0 zombies: 38 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00984 sec ] + [ GrB_select (wait:A 38 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 1.79e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.18132e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51562 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79827 + vlen: 2003 nvec_nonempty: 1928 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 974196 shallow: 0 total: 974196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 638616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1557800 shallow: 0 size: 319308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79826 entries, memory: 951.4 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0179 sec ] + [ GrB_Matrix_nvals + 1.86e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.1573e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51276 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79525 + vlen: 2003 nvec_nonempty: 1913 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 970572 shallow: 0 total: 970572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 636200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a3a00 shallow: 0 size: 318100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79524 entries, memory: 947.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00974 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14461e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51122 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79365 + vlen: 2003 nvec_nonempty: 1903 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 968652 shallow: 0 total: 968652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a2f00 shallow: 0 size: 317460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79364 entries, memory: 945.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00906 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a2300 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00977 sec ] + [ GrB_select (hyper to sparse) + 0.00831 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1c00 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.00828 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1800 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00979 sec ] + [ GrB_select (hyper to sparse) + 0.00828 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1700 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00953 sec ] + [ GrB_select (hyper to sparse) + 0.00825 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1300 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00984 sec ] + [ GrB_select (hyper to sparse) + 0.00829 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1000 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (hyper to sparse) + 0.00827 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00188 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000116 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000154 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.01 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 2.09e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12704e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50958 +bucket 2: 28184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79143 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 965988 shallow: 0 total: 965988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a6600 shallow: 0 size: 316572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79142 entries, memory: 943.3 KB + pending tuples: 0 max pending: 0 zombies: 242 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 25 entries [29:53] + ... + Pending (nil) + + 0.00979 sec ] + [ GrB_select (wait:A 242 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.00653e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49476 +bucket 2: 28126 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77603 + vlen: 2003 nvec_nonempty: 1857 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 947508 shallow: 0 total: 947508 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 620824 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1550600 shallow: 0 size: 310412 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77602 entries, memory: 925.3 KB + pending tuples: 0 max pending: 0 zombies: 28 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (wait:A 28 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.95361e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48884 +bucket 2: 28032 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76917 + vlen: 2003 nvec_nonempty: 1832 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 939276 shallow: 0 total: 939276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 615336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1597900 shallow: 0 size: 307668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76916 entries, memory: 917.3 KB + pending tuples: 0 max pending: 0 zombies: 24 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (wait:A 24 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93781e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48728 +bucket 2: 27982 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76711 + vlen: 2003 nvec_nonempty: 1807 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 936804 shallow: 0 total: 936804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc154ab00 shallow: 0 size: 306844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76710 entries, memory: 914.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0093 sec ] + [ GrB_select (hyper to sparse) + 0.00835 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93122e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48664 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76625 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 935772 shallow: 0 total: 935772 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613000 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14b4500 shallow: 0 size: 306500 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76624 entries, memory: 913.8 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0096 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0174 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594c00 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00885 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594b00 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00812 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594b00 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (hyper to sparse) + 0.00886 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00184 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000177 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.98471e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49590 +bucket 2: 27730 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77321 + vlen: 2003 nvec_nonempty: 1940 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 944124 shallow: 0 total: 944124 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 618568 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15a1100 shallow: 0 size: 309284 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77320 entries, memory: 922.0 KB + pending tuples: 0 max pending: 0 zombies: 328 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00955 sec ] + [ GrB_select (wait:A 328 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0167 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.74425e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46632 +bucket 2: 27508 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74141 + vlen: 2003 nvec_nonempty: 1823 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 905964 shallow: 0 total: 905964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 593128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1545e00 shallow: 0 size: 296564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74140 entries, memory: 884.7 KB + pending tuples: 0 max pending: 0 zombies: 158 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00913 sec ] + [ GrB_select (wait:A 158 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.56e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.69752e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46054 +bucket 2: 27452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73507 + vlen: 2003 nvec_nonempty: 1660 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 898356 shallow: 0 total: 898356 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 588056 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1586900 shallow: 0 size: 294028 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73506 entries, memory: 877.3 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00936 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.68227e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45856 +bucket 2: 27442 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73299 + vlen: 2003 nvec_nonempty: 1647 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 895860 shallow: 0 total: 895860 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 586392 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153d700 shallow: 0 size: 293196 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73298 entries, memory: 874.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00904 sec ] + [ GrB_select (hyper to sparse) + 0.0078 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67525e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45770 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73203 + vlen: 2003 nvec_nonempty: 1641 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 894708 shallow: 0 total: 894708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ada00 shallow: 0 size: 292812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73202 entries, memory: 873.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (hyper to sparse) + 0.00852 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67306e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73173 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 894348 shallow: 0 total: 894348 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585384 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ad900 shallow: 0 size: 292692 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73172 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00924 sec ] + [ GrB_select (hyper to sparse) + 0.0077 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ad900 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (hyper to sparse) + 0.00776 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0018 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000111 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000152 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0173 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92388e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48960 +bucket 2: 27568 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76529 + vlen: 2003 nvec_nonempty: 1936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 934620 shallow: 0 total: 934620 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612232 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc159eb00 shallow: 0 size: 306116 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76528 entries, memory: 912.7 KB + pending tuples: 0 max pending: 0 zombies: 350 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (wait:A 350 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0167 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.66168e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45746 +bucket 2: 27270 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73017 + vlen: 2003 nvec_nonempty: 1771 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 892476 shallow: 0 total: 892476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 584136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1542100 shallow: 0 size: 292068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73016 entries, memory: 871.6 KB + pending tuples: 0 max pending: 0 zombies: 132 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00902 sec ] + [ GrB_select (wait:A 132 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0166 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.61855e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45198 +bucket 2: 27224 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72423 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 885348 shallow: 0 total: 885348 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 579384 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1581400 shallow: 0 size: 289692 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72422 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0153 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.61016e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45116 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72307 + vlen: 2003 nvec_nonempty: 1614 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 883956 shallow: 0 total: 883956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1539600 shallow: 0 size: 289228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72306 entries, memory: 863.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00924 sec ] + [ GrB_select (hyper to sparse) + 0.00842 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14abe00 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00921 sec ] + [ GrB_select (hyper to sparse) + 0.00765 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00181 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000114 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000151 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0182 sec ] + [ GrB_Matrix_nvals + 1.79e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.79303e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47624 +bucket 2: 27172 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74797 + vlen: 2003 nvec_nonempty: 1921 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 913836 shallow: 0 total: 913836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 598376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1599a00 shallow: 0 size: 299188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74796 entries, memory: 892.4 KB + pending tuples: 0 max pending: 0 zombies: 142 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00944 sec ] + [ GrB_select (wait:A 142 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.48746e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43754 +bucket 2: 26832 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70587 + vlen: 2003 nvec_nonempty: 1634 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 863316 shallow: 0 total: 863316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 564696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153a500 shallow: 0 size: 282348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70586 entries, memory: 843.1 KB + pending tuples: 0 max pending: 0 zombies: 96 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00892 sec ] + [ GrB_select (wait:A 96 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.42123e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43058 +bucket 2: 26582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69641 + vlen: 2003 nvec_nonempty: 1565 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 851964 shallow: 0 total: 851964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 557128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1574700 shallow: 0 size: 278564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69640 entries, memory: 832.0 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0149 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39862e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42842 +bucket 2: 26472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69315 + vlen: 2003 nvec_nonempty: 1553 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 848052 shallow: 0 total: 848052 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 554520 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152e200 shallow: 0 size: 277260 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69314 entries, memory: 828.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 2.01e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1570800 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.79e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152c600 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00821 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a5500 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00935 sec ] + [ GrB_select (hyper to sparse) + 0.00741 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00178 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000106 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000154 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.65381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46018 +bucket 2: 26890 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72909 + vlen: 2003 nvec_nonempty: 1883 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 891180 shallow: 0 total: 891180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 583272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1594200 shallow: 0 size: 291636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72908 entries, memory: 870.3 KB + pending tuples: 0 max pending: 0 zombies: 198 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 18 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 17 + row 167: 22 + row 173: 8 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 13 + ... + Pending (nil) + + 0.00934 sec ] + [ GrB_select (wait:A 198 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.29674e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41694 +bucket 2: 26132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67827 + vlen: 2003 nvec_nonempty: 1598 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 830196 shallow: 0 total: 830196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 542616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1531300 shallow: 0 size: 271308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67826 entries, memory: 810.7 KB + pending tuples: 0 max pending: 0 zombies: 102 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.009 sec ] + [ GrB_select (wait:A 102 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0146 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.22885e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41008 +bucket 2: 25808 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66817 + vlen: 2003 nvec_nonempty: 1496 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 818076 shallow: 0 total: 818076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 534536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1566b00 shallow: 0 size: 267268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66816 entries, memory: 798.9 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00912 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21872e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40988 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66665 + vlen: 2003 nvec_nonempty: 1492 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 816252 shallow: 0 total: 816252 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533320 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1523600 shallow: 0 size: 266660 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66664 entries, memory: 797.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00921 sec ] + [ GrB_select (hyper to sparse) + 0.00796 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21619e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40970 +bucket 2: 25656 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66627 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815796 shallow: 0 total: 815796 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533016 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0d00 shallow: 0 size: 266508 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66626 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00921 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0c00 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0c00 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00878 sec ] + [ GrB_select (hyper to sparse) + 0.00718 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0b00 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00875 sec ] + [ GrB_select (hyper to sparse) + 0.00718 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0a00 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00928 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0900 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00933 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0800 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00931 sec ] + [ GrB_select (hyper to sparse) + 0.00714 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0600 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (hyper to sparse) + 0.00717 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0500 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00927 sec ] + [ GrB_select (hyper to sparse) + 0.00714 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0400 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0300 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00882 sec ] + [ GrB_select (hyper to sparse) + 0.00715 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14a0300 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00878 sec ] + [ GrB_select (hyper to sparse) + 0.00714 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00176 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000106 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000181 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0169 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.28105e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41598 +bucket 2: 25996 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67595 + vlen: 2003 nvec_nonempty: 1872 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 827412 shallow: 0 total: 827412 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 540760 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1584900 shallow: 0 size: 270380 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67594 entries, memory: 808.0 KB + pending tuples: 0 max pending: 0 zombies: 392 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00897 sec ] + [ GrB_select (wait:A 392 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.014 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.83985e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35884 +bucket 2: 24822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60707 + vlen: 2003 nvec_nonempty: 1405 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 744756 shallow: 0 total: 744756 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 485656 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1518900 shallow: 0 size: 242828 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60706 entries, memory: 727.3 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00869 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0144 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77961e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35146 +bucket 2: 24558 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59705 + vlen: 2003 nvec_nonempty: 1187 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 732732 shallow: 0 total: 732732 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 477640 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1544100 shallow: 0 size: 238820 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59704 entries, memory: 715.6 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00858 sec ] + [ GrB_select (hyper to sparse) + 0.00659 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.76106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34938 +bucket 2: 24454 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59393 + vlen: 2003 nvec_nonempty: 1179 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 728988 shallow: 0 total: 728988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 475144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1541e00 shallow: 0 size: 237572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59392 entries, memory: 711.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (hyper to sparse) + 0.00733 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75147e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34870 +bucket 2: 24360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59231 + vlen: 2003 nvec_nonempty: 1171 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 727044 shallow: 0 total: 727044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1540a00 shallow: 0 size: 236924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59230 entries, memory: 710.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00855 sec ] + [ GrB_select (hyper to sparse) + 0.00658 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153fd00 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00853 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0021 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.56e-05 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000143 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0161 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2382e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41186 +bucket 2: 25770 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66957 + vlen: 2003 nvec_nonempty: 1842 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 819756 shallow: 0 total: 819756 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 535656 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1582b00 shallow: 0 size: 267828 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66956 entries, memory: 800.5 KB + pending tuples: 0 max pending: 0 zombies: 366 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 13 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00898 sec ] + [ GrB_select (wait:A 366 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0155 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.79826e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35436 +bucket 2: 24580 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60017 + vlen: 2003 nvec_nonempty: 1381 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 736476 shallow: 0 total: 736476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 480136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1515f00 shallow: 0 size: 240068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60016 entries, memory: 719.2 KB + pending tuples: 0 max pending: 0 zombies: 44 + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (wait:A 44 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0146 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72003e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34440 +bucket 2: 24256 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58697 + vlen: 2003 nvec_nonempty: 1180 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 720636 shallow: 0 total: 720636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 469576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc153fe00 shallow: 0 size: 234788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58696 entries, memory: 703.7 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0137 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6962e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34220 +bucket 2: 24068 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58289 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 715740 shallow: 0 total: 715740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 466312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 233156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58288 entries, memory: 699.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00844 sec ] + [ GrB_select (hyper to sparse) + 0.0072 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34064 +bucket 2: 23954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58019 + vlen: 2003 nvec_nonempty: 1162 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 712500 shallow: 0 total: 712500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 464152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490000 shallow: 0 size: 232076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58018 entries, memory: 695.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00845 sec ] + [ GrB_select (hyper to sparse) + 0.00714 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67231e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34000 +bucket 2: 23876 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57877 + vlen: 2003 nvec_nonempty: 1157 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 710796 shallow: 0 total: 710796 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 463016 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148fb00 shallow: 0 size: 231508 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57876 entries, memory: 694.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.00643 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66838e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33952 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57809 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709980 shallow: 0 total: 709980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f900 shallow: 0 size: 231236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57808 entries, memory: 693.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6655e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33932 +bucket 2: 23826 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57759 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709380 shallow: 0 total: 709380 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462072 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 231036 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57758 entries, memory: 692.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00855 sec ] + [ GrB_select (hyper to sparse) + 0.00638 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66388e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33928 +bucket 2: 23802 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57731 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 709044 shallow: 0 total: 709044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 230924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57730 entries, memory: 692.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00819 sec ] + [ GrB_select (hyper to sparse) + 0.0064 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66261e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33910 +bucket 2: 23798 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57709 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708780 shallow: 0 total: 708780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57708 entries, memory: 692.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.00638 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66215e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33922 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57701 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708684 shallow: 0 total: 708684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57700 entries, memory: 692.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00843 sec ] + [ GrB_select (hyper to sparse) + 0.00638 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00841 sec ] + [ GrB_select (hyper to sparse) + 0.00639 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00207 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.57e-05 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000133 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0168 sec ] + [ GrB_Matrix_nvals + 1.86e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.15672e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40338 +bucket 2: 25388 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 65727 + vlen: 2003 nvec_nonempty: 1822 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 804996 shallow: 0 total: 804996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 525816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc157f000 shallow: 0 size: 262908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 65726 entries, memory: 786.1 KB + pending tuples: 0 max pending: 0 zombies: 362 + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 10 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 10 + row 9: 19 + row 10: 19 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 14 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 10 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.00901 sec ] + [ GrB_select (wait:A 362 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 2.16e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71534e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34398 +bucket 2: 24218 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58617 + vlen: 2003 nvec_nonempty: 1305 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 719676 shallow: 0 total: 719676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 468936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1510d00 shallow: 0 size: 234468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58616 entries, memory: 702.8 KB + pending tuples: 0 max pending: 0 zombies: 20 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00852 sec ] + [ GrB_select (wait:A 20 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.79e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.6248e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33306 +bucket 2: 23742 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57049 + vlen: 2003 nvec_nonempty: 1155 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 700860 shallow: 0 total: 700860 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 456392 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1538500 shallow: 0 size: 228196 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57048 entries, memory: 684.4 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00833 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0144 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.5773e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32858 +bucket 2: 23350 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56209 + vlen: 2003 nvec_nonempty: 1134 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 690780 shallow: 0 total: 690780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 449672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fbe00 shallow: 0 size: 224836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56208 entries, memory: 674.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0081 sec ] + [ GrB_select (hyper to sparse) + 0.00674 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54837e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32788 +bucket 2: 22902 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55691 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 684564 shallow: 0 total: 684564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 445528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b700 shallow: 0 size: 222764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55690 entries, memory: 668.5 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00803 sec ] + [ GrB_select (hyper to sparse) + 0.0062 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53904e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32660 +bucket 2: 22862 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55523 + vlen: 2003 nvec_nonempty: 1107 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 682548 shallow: 0 total: 682548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 444184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b200 shallow: 0 size: 222092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55522 entries, memory: 666.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.008 sec ] + [ GrB_select (hyper to sparse) + 0.00695 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00811 sec ] + [ GrB_select (hyper to sparse) + 0.00618 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00197 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.58e-05 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000142 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.05966e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 39348 +bucket 2: 24882 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 64231 + vlen: 2003 nvec_nonempty: 1805 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 787044 shallow: 0 total: 787044 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 513848 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc157ab00 shallow: 0 size: 256924 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 64230 entries, memory: 768.6 KB + pending tuples: 0 max pending: 0 zombies: 414 + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 6 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 9 + row 9: 19 + row 10: 19 + row 11: 18 + row 156: 17 + row 157: 17 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 22 entries [21:42] + row 0: 20 + row 2: 6 + row 3: 19 + row 4: 18 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 9 + ... + Pending (nil) + + 0.00887 sec ] + [ GrB_select (wait:A 414 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.58371e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32850 +bucket 2: 23472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56323 + vlen: 2003 nvec_nonempty: 1164 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 692148 shallow: 0 total: 692148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 450584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1509600 shallow: 0 size: 225292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56322 entries, memory: 675.9 KB + pending tuples: 0 max pending: 0 zombies: 8 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00831 sec ] + [ GrB_select (wait:A 8 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0129 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.48377e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32068 +bucket 2: 22448 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54517 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 670476 shallow: 0 total: 670476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 436136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc152c600 shallow: 0 size: 218068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54516 entries, memory: 654.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00801 sec ] + [ GrB_select (hyper to sparse) + 0.00684 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.43283e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31686 +bucket 2: 21886 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53573 + vlen: 2003 nvec_nonempty: 1090 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 659148 shallow: 0 total: 659148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 428584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1528000 shallow: 0 size: 214292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53572 entries, memory: 643.7 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00784 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41661e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31630 +bucket 2: 21638 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53269 + vlen: 2003 nvec_nonempty: 1078 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 655500 shallow: 0 total: 655500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 426152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1524100 shallow: 0 size: 213076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53268 entries, memory: 640.1 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00779 sec ] + [ GrB_select (hyper to sparse) + 0.00603 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1522c00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00781 sec ] + [ GrB_select (hyper to sparse) + 0.006 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00191 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.36e-05 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000123 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0165 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.92656e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 37642 +bucket 2: 24478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 62121 + vlen: 2003 nvec_nonempty: 1774 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 761724 shallow: 0 total: 761724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 496968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1574800 shallow: 0 size: 248484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 62120 entries, memory: 743.9 KB + pending tuples: 0 max pending: 0 zombies: 426 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 13 + row 163: 12 + row 165: 8 + row 166: 10 + row 167: 18 + column: 1 : 21 entries [19:39] + row 0: 18 + row 2: 1 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + ... + Pending (nil) + + 0.00869 sec ] + [ GrB_select (wait:A 426 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0138 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45722e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31526 +bucket 2: 22500 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54027 + vlen: 2003 nvec_nonempty: 1135 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 664596 shallow: 0 total: 664596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 432216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1500b00 shallow: 0 size: 216108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54026 entries, memory: 649.0 KB + pending tuples: 0 max pending: 0 zombies: 20 + + column: 0 : 14 entries [0:13] + row 1: 13 + row 3: 11 + row 4: 13 + row 5: 13 + row 6: 13 + row 7: 13 + row 9: 13 + row 10: 13 + row 156: 13 + row 157: 13 + row 159: 12 + row 160: 12 + row 161: 13 + row 167: 13 + column: 1 : 14 entries [14:27] + row 0: 13 + row 3: 11 + row 4: 13 + row 5: 13 + row 6: 13 + row 7: 13 + row 9: 13 + row 10: 13 + row 156: 13 + row 157: 13 + row 159: 12 + row 160: 12 + row 161: 13 + row 167: 13 + column: 2 : 0 entries [28:27] + column: 3 : 12 entries [28:39] + row 0: 11 + ... + Pending (nil) + + 0.00811 sec ] + [ GrB_select (wait:A 20 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0127 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.33568e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30944 +bucket 2: 20780 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51725 + vlen: 2003 nvec_nonempty: 1075 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 636972 shallow: 0 total: 636972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 413800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc151fc00 shallow: 0 size: 206900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51724 entries, memory: 622.0 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00769 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0117 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.27432e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30334 +bucket 2: 20188 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50523 + vlen: 2003 nvec_nonempty: 1026 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 622548 shallow: 0 total: 622548 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 404184 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14e6600 shallow: 0 size: 202092 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50522 entries, memory: 608.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00765 sec ] + [ GrB_select (hyper to sparse) + 0.00624 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25143e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30100 +bucket 2: 19966 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50067 + vlen: 2003 nvec_nonempty: 1012 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 617076 shallow: 0 total: 617076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 400536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480700 shallow: 0 size: 200268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50066 entries, memory: 602.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00758 sec ] + [ GrB_select (hyper to sparse) + 0.00641 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24254e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29946 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49889 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 614940 shallow: 0 total: 614940 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 399112 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480200 shallow: 0 size: 199556 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49888 entries, memory: 600.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00755 sec ] + [ GrB_select (hyper to sparse) + 0.00572 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00751 sec ] + [ GrB_select (hyper to sparse) + 0.00646 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00749 sec ] + [ GrB_select (hyper to sparse) + 0.0057 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00187 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.05e-05 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000146 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.82328e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36250 +bucket 2: 24182 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 60433 + vlen: 2003 nvec_nonempty: 1754 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 741468 shallow: 0 total: 741468 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 483464 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc156f900 shallow: 0 size: 241732 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 60432 entries, memory: 724.1 KB + pending tuples: 0 max pending: 0 zombies: 428 + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 13 + row 163: 12 + row 165: 8 + row 166: 10 + row 167: 18 + column: 1 : 21 entries [19:39] + row 0: 18 + row 2: 1 + row 3: 16 + row 4: 17 + row 5: 16 + row 6: 18 + row 7: 18 + row 9: 16 + row 10: 16 + row 11: 14 + ... + Pending (nil) + + 0.00863 sec ] + [ GrB_select (wait:A 428 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.33754e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 21174 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51761 + vlen: 2003 nvec_nonempty: 1108 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 637404 shallow: 0 total: 637404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 414088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14c6700 shallow: 0 size: 207044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51760 entries, memory: 622.5 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 14 entries [0:13] + row 1: 13 + row 3: 10 + row 4: 11 + row 5: 6 + row 6: 13 + row 7: 13 + row 9: 12 + row 10: 12 + row 156: 12 + row 157: 12 + row 159: 10 + row 160: 10 + row 161: 13 + row 167: 13 + column: 1 : 14 entries [14:27] + row 0: 13 + row 3: 10 + row 4: 11 + row 5: 6 + row 6: 13 + row 7: 13 + row 9: 12 + row 10: 12 + row 156: 12 + row 157: 12 + row 159: 10 + row 160: 10 + row 161: 13 + row 167: 13 + column: 2 : 0 entries [28:27] + column: 3 : 11 entries [28:38] + row 0: 10 + ... + Pending (nil) + + 0.00799 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0124 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.19303e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29410 +bucket 2: 19474 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48885 + vlen: 2003 nvec_nonempty: 999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 602892 shallow: 0 total: 602892 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 391080 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147e200 shallow: 0 size: 195540 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48884 entries, memory: 588.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00747 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0116 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13995e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28554 +bucket 2: 19230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47785 + vlen: 2003 nvec_nonempty: 954 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 589692 shallow: 0 total: 589692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 382280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150a300 shallow: 0 size: 191140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47784 entries, memory: 575.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00744 sec ] + [ GrB_select (hyper to sparse) + 0.00554 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13004e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28380 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47577 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 587196 shallow: 0 total: 587196 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 380616 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1507e00 shallow: 0 size: 190308 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47576 entries, memory: 573.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00739 sec ] + [ GrB_select (hyper to sparse) + 0.00607 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1506e00 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00741 sec ] + [ GrB_select (hyper to sparse) + 0.00631 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00181 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.18e-05 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000262 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55906e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32398 +bucket 2: 23484 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55883 + vlen: 2003 nvec_nonempty: 1562 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 686868 shallow: 0 total: 686868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 447064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1562400 shallow: 0 size: 223532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55882 entries, memory: 670.8 KB + pending tuples: 0 max pending: 0 zombies: 290 + + column: 0 : 18 entries [0:17] + row 1: 16 + row 3: 13 + row 4: 14 + row 5: 12 + row 6: 16 + row 7: 16 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + row 157: 15 + row 159: 12 + row 160: 12 + row 161: 17 + row 162: 10 + row 163: 9 + row 166: 6 + row 167: 17 + column: 1 : 20 entries [18:37] + row 0: 16 + row 2: 1 + row 3: 13 + row 4: 13 + row 5: 11 + row 6: 16 + row 7: 16 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + ... + Pending (nil) + + 0.00833 sec ] + [ GrB_select (wait:A 290 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0127 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.17494e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28792 +bucket 2: 19720 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48513 + vlen: 2003 nvec_nonempty: 1059 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 598428 shallow: 0 total: 598428 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 388104 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14baa00 shallow: 0 size: 194052 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48512 entries, memory: 584.4 KB + pending tuples: 0 max pending: 0 zombies: 12 + + column: 0 : 2 entries [0:1] + row 161: 1 + row 167: 1 + column: 1 : 2 entries [2:3] + row 161: 1 + row 167: 1 + column: 2 : 0 entries [4:3] + column: 3 : 0 entries [4:3] + column: 4 : 0 entries [4:3] + column: 5 : 0 entries [4:3] + column: 6 : 18 entries [4:21] + row 7: 17 + row 9: 16 + row 10: 16 + row 11: 16 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 13 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [22:39] + row 6: 17 + row 9: 16 + row 10: 16 + row 11: 16 + row 12: 17 + row 13: 17 + row 15: 17 + ... + Pending (nil) + + 0.00767 sec ] + [ GrB_select (wait:A 12 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0114 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02523e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26864 +bucket 2: 18452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45317 + vlen: 2003 nvec_nonempty: 880 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 560076 shallow: 0 total: 560076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 362536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1477300 shallow: 0 size: 181268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45316 entries, memory: 546.9 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 13 entries [0:12] + row 7: 12 + row 12: 12 + row 13: 12 + row 15: 12 + row 16: 12 + row 17: 12 + row 150: 12 + row 151: 12 + row 155: 12 + row 156: 12 + row 159: 12 + row 160: 12 + row 161: 12 + column: 7 : 13 entries [13:25] + row 6: 12 + row 12: 12 + row 13: 12 + row 15: 12 + row 16: 12 + row 17: 12 + row 150: 12 + row 151: 12 + row 155: 12 + row 156: 12 + row 159: 12 + row 160: 12 + row 161: 12 + column: 8 : 0 entries [26:25] + column: 9 : 0 entries [26:25] + column: 10 : 0 entries [26:25] + ... + Pending (nil) + + 0.00716 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0113 sec ] + [ GrB_Matrix_nvals + 2.24e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00291e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26452 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44821 + vlen: 2003 nvec_nonempty: 835 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 554124 shallow: 0 total: 554124 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358568 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fa900 shallow: 0 size: 179284 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44820 entries, memory: 541.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00708 sec ] + [ GrB_select (hyper to sparse) + 0.0053 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14f9800 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00704 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00167 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.11e-05 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000126 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.49359e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 23110 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54697 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 672636 shallow: 0 total: 672636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 437576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc155ec00 shallow: 0 size: 218788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54696 entries, memory: 656.9 KB + pending tuples: 0 max pending: 0 zombies: 274 + + column: 0 : 18 entries [0:17] + row 1: 15 + row 3: 12 + row 4: 14 + row 5: 12 + row 6: 15 + row 7: 15 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 15 + row 157: 15 + row 159: 12 + row 160: 12 + row 161: 17 + row 162: 7 + row 163: 8 + row 166: 6 + row 167: 17 + column: 1 : 17 entries [18:34] + row 0: 15 + row 2: 1 + row 3: 12 + row 4: 12 + row 5: 10 + row 6: 15 + row 7: 15 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + ... + Pending (nil) + + 0.00817 sec ] + [ GrB_select (wait:A 274 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0125 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.0822e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28032 +bucket 2: 18526 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46559 + vlen: 2003 nvec_nonempty: 1023 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 574980 shallow: 0 total: 574980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 372472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14e3e00 shallow: 0 size: 186236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46558 entries, memory: 561.5 KB + pending tuples: 0 max pending: 0 zombies: 28 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 15 entries [0:14] + row 7: 14 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 9 + row 13: 9 + row 15: 9 + row 16: 9 + row 150: 11 + row 155: 11 + row 156: 10 + row 157: 10 + row 159: 14 + row 160: 14 + row 161: 14 + column: 7 : 15 entries [15:29] + row 6: 14 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 9 + row 13: 9 + row 15: 9 + row 16: 9 + row 150: 11 + row 155: 11 + row 156: 10 + row 157: 10 + row 159: 14 + row 160: 14 + ... + Pending (nil) + + 0.00743 sec ] + [ GrB_select (wait:A 28 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0118 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:990598 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26420 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44545 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550812 shallow: 0 total: 550812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 356360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fc100 shallow: 0 size: 178180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44544 entries, memory: 537.9 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00707 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0105 sec ] + [ GrB_Matrix_nvals + 2.09e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cc800 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00711 sec ] + [ GrB_select (hyper to sparse) + 0.00601 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 4.47e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00169 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.25e-05 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000133 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0151 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42226e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 22788 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53375 + vlen: 2003 nvec_nonempty: 1509 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 656772 shallow: 0 total: 656772 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 427000 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486e00 shallow: 0 size: 213500 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53374 entries, memory: 641.4 KB + pending tuples: 0 max pending: 0 zombies: 244 + + column: 0 : 13 entries [0:12] + row 1: 8 + row 3: 6 + row 4: 10 + row 5: 4 + row 6: 8 + row 7: 8 + row 9: 8 + row 10: 8 + row 156: 8 + row 157: 8 + row 162: 5 + row 166: 3 + row 167: 6 + column: 1 : 10 entries [13:22] + row 0: 8 + row 3: 6 + row 4: 6 + row 6: 8 + row 7: 8 + row 9: 8 + row 10: 8 + row 156: 7 + row 157: 7 + row 163: 2 + column: 2 : 0 entries [23:22] + column: 3 : 7 entries [23:29] + row 0: 6 + row 1: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (wait:A 244 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0122 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02089e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26828 +bucket 2: 18392 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45221 + vlen: 2003 nvec_nonempty: 954 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 558924 shallow: 0 total: 558924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 361768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150b000 shallow: 0 size: 180884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45220 entries, memory: 545.8 KB + pending tuples: 0 max pending: 0 zombies: 26 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 3 entries [0:2] + row 7: 2 + row 9: 2 + row 10: 2 + column: 7 : 3 entries [3:5] + row 6: 2 + row 9: 2 + row 10: 2 + column: 8 : 0 entries [6:5] + column: 9 : 3 entries [6:8] + row 6: 2 + row 7: 2 + row 10: 2 + column: 10 : 3 entries [9:11] + row 6: 2 + row 7: 2 + row 9: 2 + ... + Pending (nil) + + 0.0071 sec ] + [ GrB_select (wait:A 26 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0113 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cdd00 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00705 sec ] + [ GrB_select (hyper to sparse) + 0.00602 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00174 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.04e-05 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000126 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.377e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30114 +bucket 2: 22404 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52519 + vlen: 2003 nvec_nonempty: 1497 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 646500 shallow: 0 total: 646500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 420152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1485400 shallow: 0 size: 210076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52518 entries, memory: 631.3 KB + pending tuples: 0 max pending: 0 zombies: 234 + + column: 0 : 11 entries [0:10] + row 1: 6 + row 3: 6 + row 4: 10 + row 5: 4 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + row 162: 3 + row 166: 3 + row 167: 4 + column: 1 : 8 entries [11:18] + row 0: 6 + row 3: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + row 163: zombie + column: 2 : 0 entries [19:18] + column: 3 : 7 entries [19:25] + row 0: 6 + row 1: 6 + row 4: 6 + row 6: 6 + row 7: 6 + row 9: 6 + row 10: 6 + column: 4 : 11 entries [26:36] + row 0: 10 + row 1: 6 + row 3: 6 + ... + Pending (nil) + + 0.00782 sec ] + [ GrB_select (wait:A 234 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0121 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00479e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26616 +bucket 2: 18246 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44863 + vlen: 2003 nvec_nonempty: 917 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 554628 shallow: 0 total: 554628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1508300 shallow: 0 size: 179452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44862 entries, memory: 541.6 KB + pending tuples: 0 max pending: 0 zombies: 24 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 1 entries [0:0] + row 7: zombie + column: 7 : 1 entries [1:1] + row 6: zombie + column: 8 : 0 entries [2:1] + column: 9 : 0 entries [2:1] + column: 10 : 0 entries [2:1] + ... + Pending (nil) + + 0.00709 sec ] + [ GrB_select (wait:A 24 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0104 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14cd100 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00704 sec ] + [ GrB_select (hyper to sparse) + 0.00598 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00171 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.14e-05 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00013 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.31715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29374 +bucket 2: 21990 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 51365 + vlen: 2003 nvec_nonempty: 1453 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 632652 shallow: 0 total: 632652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 410920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1483000 shallow: 0 size: 205460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 51364 entries, memory: 617.8 KB + pending tuples: 0 max pending: 0 zombies: 200 + + column: 0 : 8 entries [0:7] + row 1: 2 + row 4: 3 + row 5: 2 + row 6: 2 + row 7: 2 + row 162: 2 + row 166: 1 + row 167: 4 + column: 1 : 5 entries [8:12] + row 0: 2 + row 3: zombie + row 6: 2 + row 7: 2 + row 163: zombie + column: 2 : 0 entries [13:12] + column: 3 : 1 entries [13:13] + row 1: zombie + column: 4 : 4 entries [14:17] + row 0: 3 + row 5: 2 + row 162: 2 + row 167: 3 + column: 5 : 3 entries [18:20] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 16 entries [21:36] + row 0: 2 + row 1: 2 + row 7: 15 + row 8: 3 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 4 + ... + Pending (nil) + + 0.00775 sec ] + [ GrB_select (wait:A 200 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0119 sec ] + [ GrB_Matrix_nvals + 1.79e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:977035 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26282 +bucket 2: 17956 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44239 + vlen: 2003 nvec_nonempty: 869 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 547140 shallow: 0 total: 547140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 353912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1504400 shallow: 0 size: 176956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44238 entries, memory: 534.3 KB + pending tuples: 0 max pending: 0 zombies: 8 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.007 sec ] + [ GrB_select (wait:A 8 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0111 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:938723 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25688 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43363 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 536628 shallow: 0 total: 536628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 346904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14c9d00 shallow: 0 size: 173452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43362 entries, memory: 524.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00693 sec ] + [ GrB_select (hyper to sparse) + 0.00587 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472b00 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00688 sec ] + [ GrB_select (hyper to sparse) + 0.00575 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472200 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00683 sec ] + [ GrB_select (hyper to sparse) + 0.00578 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472200 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00682 sec ] + [ GrB_select (hyper to sparse) + 0.0051 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1472100 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (hyper to sparse) + 0.00508 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00162 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.89e-05 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00012 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0156 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23111e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29440 +bucket 2: 20218 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49659 + vlen: 2003 nvec_nonempty: 1421 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 612180 shallow: 0 total: 612180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fa00 shallow: 0 size: 198636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49658 entries, memory: 597.8 KB + pending tuples: 0 max pending: 0 zombies: 180 + + column: 0 : 8 entries [0:7] + row 1: 2 + row 4: 3 + row 5: 2 + row 6: 2 + row 7: 2 + row 162: 2 + row 166: 1 + row 167: 4 + column: 1 : 4 entries [8:11] + row 0: 2 + row 6: 2 + row 7: 2 + row 163: zombie + column: 2 : 0 entries [12:11] + column: 3 : 0 entries [12:11] + column: 4 : 4 entries [12:15] + row 0: 3 + row 5: 2 + row 162: 2 + row 167: 3 + column: 5 : 3 entries [16:18] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 15 entries [19:33] + row 0: 2 + row 1: 2 + row 7: 14 + row 8: 1 + row 9: 8 + row 10: 8 + row 11: 8 + row 12: 3 + row 13: 3 + row 150: 8 + ... + Pending (nil) + + 0.00764 sec ] + [ GrB_select (wait:A 180 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0114 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:751128 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24834 +bucket 2: 13954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 38789 + vlen: 2003 nvec_nonempty: 831 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 481740 shallow: 0 total: 481740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 310312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14f1100 shallow: 0 size: 155156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 38788 entries, memory: 470.4 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00653 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0104 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:636932 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23340 +bucket 2: 12378 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35719 + vlen: 2003 nvec_nonempty: 730 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 444900 shallow: 0 total: 444900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 285752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14b0300 shallow: 0 size: 142876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35718 entries, memory: 434.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00633 sec ] + [ GrB_select (hyper to sparse) + 0.00454 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:620984 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23360 +bucket 2: 11908 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35269 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 439500 shallow: 0 total: 439500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 282152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463900 shallow: 0 size: 141076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35268 entries, memory: 429.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00633 sec ] + [ GrB_select (hyper to sparse) + 0.00444 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612911 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23636 +bucket 2: 11402 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35039 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436740 shallow: 0 total: 436740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 280312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463100 shallow: 0 size: 140156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35038 entries, memory: 426.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00635 sec ] + [ GrB_select (hyper to sparse) + 0.00517 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611653 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23678 +bucket 2: 11324 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35003 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436308 shallow: 0 total: 436308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 280024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463000 shallow: 0 size: 140012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35002 entries, memory: 426.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00644 sec ] + [ GrB_select (hyper to sparse) + 0.00439 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463000 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00627 sec ] + [ GrB_select (hyper to sparse) + 0.00439 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1462f00 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00627 sec ] + [ GrB_select (hyper to sparse) + 0.00439 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00147 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.72e-05 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000109 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:863070 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29724 +bucket 2: 11854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 41579 + vlen: 2003 nvec_nonempty: 1368 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 515220 shallow: 0 total: 515220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 332632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146fe00 shallow: 0 size: 166316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 41578 entries, memory: 503.1 KB + pending tuples: 0 max pending: 0 zombies: 128 + + column: 0 : 4 entries [0:3] + row 4: 2 + row 5: 2 + row 162: 1 + row 167: 3 + column: 1 : 0 entries [4:3] + column: 2 : 0 entries [4:3] + column: 3 : 0 entries [4:3] + column: 4 : 3 entries [4:6] + row 0: 2 + row 5: 2 + row 167: 2 + column: 5 : 3 entries [7:9] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 9 entries [10:18] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [19:27] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [28:27] + column: 9 : 9 entries [28:36] + row 6: 8 + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (wait:A 128 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00971 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:367034 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26862 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 27115 + vlen: 2003 nvec_nonempty: 713 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 341652 shallow: 0 total: 341652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 216920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1453a00 shallow: 0 size: 108460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 27114 entries, memory: 333.6 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00545 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00805 sec ] + [ GrB_Matrix_nvals + 1.42e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484b00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00423 sec ] + [ GrB_select (hyper to sparse) + 0.0043 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00128 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000196 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.95e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0162 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825705 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29092 +bucket 2: 11576 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40669 + vlen: 2003 nvec_nonempty: 1347 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 504300 shallow: 0 total: 504300 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 325352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146e100 shallow: 0 size: 162676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40668 entries, memory: 492.5 KB + pending tuples: 0 max pending: 0 zombies: 98 + + column: 0 : 3 entries [0:2] + row 4: 2 + row 5: 2 + row 167: 2 + column: 1 : 0 entries [3:2] + column: 2 : 0 entries [3:2] + column: 3 : 0 entries [3:2] + column: 4 : 3 entries [3:5] + row 0: 2 + row 5: 2 + row 167: 2 + column: 5 : 3 entries [6:8] + row 0: 2 + row 4: 2 + row 167: 2 + column: 6 : 9 entries [9:17] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [18:26] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [27:26] + column: 9 : 9 entries [27:35] + row 6: 8 + row 7: 8 + ... + Pending (nil) + + 0.0068 sec ] + [ GrB_select (wait:A 98 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00926 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:366168 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26830 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 27083 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 341268 shallow: 0 total: 341268 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 216664 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14bd500 shallow: 0 size: 108332 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 27082 entries, memory: 333.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00546 sec ] + [ GrB_select (hyper to sparse) + 0.00359 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc144fb00 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00422 sec ] + [ GrB_select (hyper to sparse) + 0.00428 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00122 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.47e-05 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000109 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0157 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:625854 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28470 +bucket 2: 6936 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35407 + vlen: 2003 nvec_nonempty: 1290 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 441156 shallow: 0 total: 441156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 283256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1463d00 shallow: 0 size: 141628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35406 entries, memory: 430.8 KB + pending tuples: 0 max pending: 0 zombies: 86 + + column: 0 : 1 entries [0:0] + row 4: zombie + column: 1 : 0 entries [1:0] + column: 2 : 0 entries [1:0] + column: 3 : 0 entries [1:0] + column: 4 : 1 entries [1:1] + row 0: zombie + column: 5 : 0 entries [2:1] + column: 6 : 9 entries [2:10] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [11:19] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [20:19] + column: 9 : 9 entries [20:28] + row 6: 8 + row 7: 8 + row 10: 8 + row 11: 8 + row 156: 8 + row 157: 8 + row 159: 8 + row 160: 8 + row 161: 8 + column: 10 : 9 entries [29:37] + ... + Pending (nil) + + 0.00646 sec ] + [ GrB_select (wait:A 86 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00833 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:152477 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17224 +bucket 2: 252 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17477 + vlen: 2003 nvec_nonempty: 601 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 225996 shallow: 0 total: 225996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a9e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 139816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 69908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17476 entries, memory: 220.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00462 sec ] + [ GrB_select (hyper to sparse) + 0.00259 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106712 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14620 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14621 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 191724 shallow: 0 total: 191724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 116968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 58484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14620 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00323 sec ] + [ GrB_select (hyper to sparse) + 0.00332 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00348 sec ] + [ GrB_select (hyper to sparse) + 0.0025 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000948 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.7e-05 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.87e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0102 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0138 sec ] + [ GrB_Matrix_nvals + 1.86e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:585584 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28216 +bucket 2: 6032 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34249 + vlen: 2003 nvec_nonempty: 1133 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 427260 shallow: 0 total: 427260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 273992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1461900 shallow: 0 size: 136996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34248 entries, memory: 417.2 KB + pending tuples: 0 max pending: 0 zombies: 68 + + column: 0 : 1 entries [0:0] + row 4: zombie + column: 1 : 0 entries [1:0] + column: 2 : 0 entries [1:0] + column: 3 : 0 entries [1:0] + column: 4 : 1 entries [1:1] + row 0: zombie + column: 5 : 0 entries [2:1] + column: 6 : 9 entries [2:10] + row 7: 8 + row 9: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 7 : 9 entries [11:19] + row 6: 8 + row 9: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 8 : 0 entries [20:19] + column: 9 : 9 entries [20:28] + row 6: 8 + row 7: 8 + row 10: 8 + row 11: 6 + row 156: 7 + row 157: 7 + row 159: 8 + row 160: 8 + row 161: 8 + column: 10 : 9 entries [29:37] + ... + Pending (nil) + + 0.00638 sec ] + [ GrB_select (wait:A 68 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00788 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:137143 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 16418 +bucket 2: 156 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 16575 + vlen: 2003 nvec_nonempty: 597 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 215172 shallow: 0 total: 215172 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 132600 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 66300 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 16574 entries, memory: 210.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00409 sec ] + [ GrB_select (hyper to sparse) + 0.00239 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:78332.8 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12526 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12527 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 166596 shallow: 0 total: 166596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc02f3c00 shallow: 0 size: 100216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0698c00 shallow: 0 size: 50108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12526 entries, memory: 162.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00298 sec ] + [ GrB_select (hyper to sparse) + 0.00278 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc141ea00 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00295 sec ] + [ GrB_select (hyper to sparse) + 0.00235 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000909 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.13e-05 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.53e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06d9000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0101 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0141 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418713 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28960 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28961 + vlen: 2003 nvec_nonempty: 1107 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 363804 shallow: 0 total: 363804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc018df00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 231688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1457400 shallow: 0 size: 115844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28960 entries, memory: 355.3 KB + pending tuples: 0 max pending: 0 zombies: 64 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 9 entries [0:8] + row 7: 8 + row 9: 6 + row 10: 6 + row 11: 4 + row 156: 7 + row 157: 7 + row 159: 5 + row 160: 5 + row 161: 8 + column: 7 : 9 entries [9:17] + row 6: 8 + row 9: 6 + row 10: 6 + row 11: 4 + row 156: 7 + row 157: 7 + row 159: 5 + row 160: 5 + row 161: 8 + column: 8 : 0 entries [18:17] + column: 9 : 7 entries [18:24] + row 6: 6 + row 7: 6 + row 10: 6 + row 11: 4 + row 156: 5 + row 157: 5 + row 161: 6 + column: 10 : 7 entries [25:31] + row 6: 6 + row 7: 6 + row 9: 6 + row 11: 4 + ... + Pending (nil) + + 0.00454 sec ] + [ GrB_select (wait:A 64 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.00649 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:14601.3 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 5408 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 5409 + vlen: 2003 nvec_nonempty: 543 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 81180 shallow: 0 total: 81180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df7200 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 43272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc06a3600 shallow: 0 size: 21636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 5408 entries, memory: 79.3 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00163 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) C is empty, iso 0 + + 0.00288 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 + 0.000161 sec ] + [ GrB_select C is empty, iso 0 + + 0.000374 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=H.*H) (jit: cpu load) + 0.000556 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:0 gpus:0 + 4.95e-06 sec ] + [ GrB_reduce work:0 gpus:0 + 4.45e-06 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14be900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00997 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.018 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06dd000 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df4700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00863 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000109 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00961 sec ] + [ GrB_select (hyper to sparse) + 0.00871 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7900 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0698c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.0086 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000111 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00975 sec ] + [ GrB_select (hyper to sparse) + 0.00857 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc069cb00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc15ae300 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00969 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0172 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00969 sec ] + [ GrB_select (hyper to sparse) + 0.00852 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc069cb00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00976 sec ] + [ GrB_select (hyper to sparse) + 0.00848 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.27e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (hyper to sparse) + 0.0086 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27866e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52638 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81039 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 988740 shallow: 0 total: 988740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc178d800 shallow: 0 size: 324156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81038 entries, memory: 965.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0177 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52600 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81001 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 988284 shallow: 0 total: 988284 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 648008 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 324004 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81000 entries, memory: 965.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00979 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.273e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52568 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80969 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 987900 shallow: 0 total: 987900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80968 entries, memory: 964.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00969 sec ] + [ GrB_select (hyper to sparse) + 0.00848 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00975 sec ] + [ GrB_select (hyper to sparse) + 0.00845 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dca00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0df0800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (hyper to sparse) + 0.00846 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.76e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00965 sec ] + [ GrB_select (hyper to sparse) + 0.00847 sec ] + [ GrB_Matrix_nvals + 1.42e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.20416e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51834 +bucket 2: 28278 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80113 + vlen: 2003 nvec_nonempty: 1935 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 977628 shallow: 0 total: 977628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 640904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 320452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80112 entries, memory: 954.7 KB + pending tuples: 0 max pending: 0 zombies: 36 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (wait:A 36 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.17559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51490 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79755 + vlen: 2003 nvec_nonempty: 1926 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 973332 shallow: 0 total: 973332 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1556e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 638040 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1968700 shallow: 0 size: 319020 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79754 entries, memory: 950.5 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0176 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.15333e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51226 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79475 + vlen: 2003 nvec_nonempty: 1911 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 969972 shallow: 0 total: 969972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1559a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 635800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79474 entries, memory: 947.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00958 sec ] + [ GrB_select (hyper to sparse) + 0.00892 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14223e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51092 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79335 + vlen: 2003 nvec_nonempty: 1901 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 968292 shallow: 0 total: 968292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79334 entries, memory: 945.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00955 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00913 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00949 sec ] + [ GrB_select (hyper to sparse) + 0.00911 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (hyper to sparse) + 0.00835 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0095 sec ] + [ GrB_select (hyper to sparse) + 0.0091 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc17dcb00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00835 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000108 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a0a00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.00909 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.04165e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49922 +bucket 2: 28132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 78055 + vlen: 2003 nvec_nonempty: 1878 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 952932 shallow: 0 total: 952932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 624440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 312220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 78054 entries, memory: 930.6 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00953 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0173 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.97438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49104 +bucket 2: 28082 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 77187 + vlen: 2003 nvec_nonempty: 1839 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 942516 shallow: 0 total: 942516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1551c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 617496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1a50300 shallow: 0 size: 308748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 77186 entries, memory: 920.4 KB + pending tuples: 0 max pending: 0 zombies: 16 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00944 sec ] + [ GrB_select (wait:A 16 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 2.16e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.94317e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48762 +bucket 2: 28018 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76781 + vlen: 2003 nvec_nonempty: 1813 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 937644 shallow: 0 total: 937644 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc154f500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 614248 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 307124 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76780 entries, memory: 915.7 KB + pending tuples: 0 max pending: 0 zombies: 6 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00935 sec ] + [ GrB_select (wait:A 6 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0178 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93367e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48680 +bucket 2: 27976 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76657 + vlen: 2003 nvec_nonempty: 1804 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 936156 shallow: 0 total: 936156 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c6200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 613256 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1a50300 shallow: 0 size: 306628 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76656 entries, memory: 914.2 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0163 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.93092e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48660 +bucket 2: 27960 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76621 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935724 shallow: 0 total: 935724 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1551900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612968 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306484 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76620 entries, memory: 913.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00957 sec ] + [ GrB_select (hyper to sparse) + 0.00889 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92969e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48646 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76605 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935532 shallow: 0 total: 935532 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612840 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306420 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76604 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00931 sec ] + [ GrB_select (hyper to sparse) + 0.00817 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92939e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48642 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76601 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935484 shallow: 0 total: 935484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76600 entries, memory: 913.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00933 sec ] + [ GrB_select (hyper to sparse) + 0.00891 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7a00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a4900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00929 sec ] + [ GrB_select (hyper to sparse) + 0.00816 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.92923e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 48640 +bucket 2: 27958 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 76599 + vlen: 2003 nvec_nonempty: 1802 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 935460 shallow: 0 total: 935460 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 612792 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 306396 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 76598 entries, memory: 913.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00936 sec ] + [ GrB_select (hyper to sparse) + 0.0088 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.78437e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 47082 +bucket 2: 27598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 74681 + vlen: 2003 nvec_nonempty: 1767 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 912444 shallow: 0 total: 912444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 597448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 298724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 74680 entries, memory: 891.1 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00917 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.70046e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 46074 +bucket 2: 27472 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73547 + vlen: 2003 nvec_nonempty: 1701 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 898836 shallow: 0 total: 898836 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06a8200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 588376 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1b30a00 shallow: 0 size: 294188 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73546 entries, memory: 877.8 KB + pending tuples: 0 max pending: 0 zombies: 46 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00918 sec ] + [ GrB_select (wait:A 46 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0171 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67745e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45800 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73233 + vlen: 2003 nvec_nonempty: 1643 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 895068 shallow: 0 total: 895068 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585864 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292932 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73232 entries, memory: 874.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00908 sec ] + [ GrB_select (hyper to sparse) + 0.00857 sec ] + [ GrB_Matrix_nvals + 1.86e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67423e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45756 +bucket 2: 27432 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73189 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894540 shallow: 0 total: 894540 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585512 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292756 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73188 entries, memory: 873.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00905 sec ] + [ GrB_select (hyper to sparse) + 0.00785 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6735e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45748 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73179 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894420 shallow: 0 total: 894420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73178 entries, memory: 873.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00902 sec ] + [ GrB_select (hyper to sparse) + 0.0086 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7b00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00904 sec ] + [ GrB_select (hyper to sparse) + 0.00785 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000111 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.67292e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45740 +bucket 2: 27430 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 73171 + vlen: 2003 nvec_nonempty: 1639 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 894324 shallow: 0 total: 894324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc06ac100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 585368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 292684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 73170 entries, memory: 873.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00909 sec ] + [ GrB_select (hyper to sparse) + 0.00868 sec ] + [ GrB_Matrix_nvals + 1.34e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.64363e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45482 +bucket 2: 27286 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72769 + vlen: 2003 nvec_nonempty: 1633 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 889500 shallow: 0 total: 889500 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 582152 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 291076 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72768 entries, memory: 868.7 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.009 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0155 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45228 +bucket 2: 27192 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72421 + vlen: 2003 nvec_nonempty: 1621 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 885324 shallow: 0 total: 885324 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 579368 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1c07100 shallow: 0 size: 289684 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72420 entries, memory: 864.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00901 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.6093e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45104 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72295 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 883812 shallow: 0 total: 883812 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578360 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289180 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72294 entries, memory: 863.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00896 sec ] + [ GrB_select (hyper to sparse) + 0.00777 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7c00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00898 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000116 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.60886e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 45098 +bucket 2: 27190 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 72289 + vlen: 2003 nvec_nonempty: 1613 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 883740 shallow: 0 total: 883740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f3c00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 578312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 289156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 72288 entries, memory: 863.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00902 sec ] + [ GrB_select (hyper to sparse) + 0.00769 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.51488e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 44110 +bucket 2: 26864 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70975 + vlen: 2003 nvec_nonempty: 1605 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 867972 shallow: 0 total: 867972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 567800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 283900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70974 entries, memory: 847.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 13 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00888 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0159 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.45627e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43418 +bucket 2: 26724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 70143 + vlen: 2003 nvec_nonempty: 1580 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 857988 shallow: 0 total: 857988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 561144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 280572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 70142 entries, memory: 837.9 KB + pending tuples: 0 max pending: 0 zombies: 22 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 12 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00889 sec ] + [ GrB_select (wait:A 22 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0158 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.41651e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 43012 +bucket 2: 26560 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69573 + vlen: 2003 nvec_nonempty: 1564 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 851148 shallow: 0 total: 851148 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 556584 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 278292 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69572 entries, memory: 831.2 KB + pending tuples: 0 max pending: 0 zombies: 4 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (wait:A 4 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.015 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.39834e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42848 +bucket 2: 26462 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69311 + vlen: 2003 nvec_nonempty: 1552 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 848004 shallow: 0 total: 848004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2e00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 554488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 277244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69310 entries, memory: 828.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 21 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 20 + row 167: 24 + row 168: 7 + row 172: 7 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (hyper to sparse) + 0.00752 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.38438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42710 +bucket 2: 26398 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 69109 + vlen: 2003 nvec_nonempty: 1549 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 845580 shallow: 0 total: 845580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c2b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 552872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1cdae00 shallow: 0 size: 276436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 69108 entries, memory: 825.8 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 27 entries [0:26] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 24 + row 5: 22 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 6 + row 40: 6 + row 41: 6 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 22 + row 173: 6 + column: 1 : 23 entries [27:49] + row 0: 22 + row 2: 12 + ... + Pending (nil) + + 0.00879 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0164 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37487e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42606 +bucket 2: 26364 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68971 + vlen: 2003 nvec_nonempty: 1540 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 843924 shallow: 0 total: 843924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68970 entries, memory: 824.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00876 sec ] + [ GrB_select (hyper to sparse) + 0.00749 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7d00 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00876 sec ] + [ GrB_select (hyper to sparse) + 0.00828 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00013 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.37349e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 42590 +bucket 2: 26360 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 68951 + vlen: 2003 nvec_nonempty: 1539 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 843684 shallow: 0 total: 843684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 551608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 275804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 68950 entries, memory: 823.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0088 sec ] + [ GrB_select (hyper to sparse) + 0.00746 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.30569e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41946 +bucket 2: 26012 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 67959 + vlen: 2003 nvec_nonempty: 1533 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 831780 shallow: 0 total: 831780 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 543672 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 271836 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 67958 entries, memory: 812.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00869 sec ] + [ GrB_select (hyper to sparse) + 0.00807 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.23646e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41162 +bucket 2: 25768 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66931 + vlen: 2003 nvec_nonempty: 1506 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 819444 shallow: 0 total: 819444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 535448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc187b400 shallow: 0 size: 267724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66930 entries, memory: 800.2 KB + pending tuples: 0 max pending: 0 zombies: 10 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00863 sec ] + [ GrB_select (wait:A 10 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0145 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21965e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 41002 +bucket 2: 25676 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66679 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 816420 shallow: 0 total: 816420 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533432 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266716 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66678 entries, memory: 797.3 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00858 sec ] + [ GrB_select (hyper to sparse) + 0.00816 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21659e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40982 +bucket 2: 25650 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66633 + vlen: 2003 nvec_nonempty: 1491 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815868 shallow: 0 total: 815868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0500 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 533064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66632 entries, memory: 796.7 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00856 sec ] + [ GrB_select (hyper to sparse) + 0.00732 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21552e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40968 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66617 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815676 shallow: 0 total: 815676 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532936 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266468 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66616 entries, memory: 796.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00877 sec ] + [ GrB_select (hyper to sparse) + 0.00803 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21499e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40960 +bucket 2: 25648 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66609 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815580 shallow: 0 total: 815580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66608 entries, memory: 796.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00864 sec ] + [ GrB_select (hyper to sparse) + 0.0073 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.21446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40966 +bucket 2: 25634 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66601 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815484 shallow: 0 total: 815484 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532808 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266404 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66600 entries, memory: 796.4 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.213e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40972 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66579 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 815220 shallow: 0 total: 815220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0400 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66578 entries, memory: 796.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00855 sec ] + [ GrB_select (hyper to sparse) + 0.0073 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.2114e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40948 +bucket 2: 25606 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66555 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 814932 shallow: 0 total: 814932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0300 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66554 entries, memory: 795.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (hyper to sparse) + 0.008 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40928 +bucket 2: 25598 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66527 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 814596 shallow: 0 total: 814596 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0300 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 532216 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 266108 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66526 entries, memory: 795.5 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00862 sec ] + [ GrB_select (hyper to sparse) + 0.00727 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20582e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40918 +bucket 2: 25552 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66471 + vlen: 2003 nvec_nonempty: 1490 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 813924 shallow: 0 total: 813924 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0200 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531768 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265884 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66470 entries, memory: 794.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00884 sec ] + [ GrB_select (hyper to sparse) + 0.00801 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.20184e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40880 +bucket 2: 25530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66411 + vlen: 2003 nvec_nonempty: 1489 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 813204 shallow: 0 total: 813204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66410 entries, memory: 794.1 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00883 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19998e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40876 +bucket 2: 25506 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66383 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812868 shallow: 0 total: 812868 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0100 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 531064 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265532 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66382 entries, memory: 793.8 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00861 sec ] + [ GrB_select (hyper to sparse) + 0.00801 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19853e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40868 +bucket 2: 25492 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66361 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812604 shallow: 0 total: 812604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66360 entries, memory: 793.6 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00884 sec ] + [ GrB_select (hyper to sparse) + 0.00722 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19641e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40850 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66329 + vlen: 2003 nvec_nonempty: 1488 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 812220 shallow: 0 total: 812220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66328 entries, memory: 793.2 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.008 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7e00 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.0086 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.29e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.19495e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 40828 +bucket 2: 25478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 66307 + vlen: 2003 nvec_nonempty: 1487 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 811956 shallow: 0 total: 811956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 530456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 265228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 66306 entries, memory: 792.9 KB + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 11 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 17 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00857 sec ] + [ GrB_select (hyper to sparse) + 0.00716 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.88498e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 36722 +bucket 2: 24724 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 61447 + vlen: 2003 nvec_nonempty: 1387 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 753636 shallow: 0 total: 753636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 491576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1da4f00 shallow: 0 size: 245788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 61446 entries, memory: 736.0 KB + pending tuples: 0 max pending: 0 zombies: 14 + + column: 0 : 23 entries [0:22] + row 1: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + row 7: 22 + row 8: 13 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 158: 10 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 16 + row 163: 18 + row 164: 11 + row 165: 16 + row 166: 16 + row 167: 18 + column: 1 : 23 entries [23:45] + row 0: 22 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 22 + ... + Pending (nil) + + 0.00832 sec ] + [ GrB_select (wait:A 14 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0134 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.77056e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 35084 +bucket 2: 24468 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59553 + vlen: 2003 nvec_nonempty: 1198 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 730908 shallow: 0 total: 730908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 476424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc150af00 shallow: 0 size: 238212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59552 entries, memory: 713.8 KB + + column: 0 : 22 entries [0:21] + row 1: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 19 + row 157: 19 + row 159: 17 + row 160: 17 + row 161: 18 + row 162: 16 + row 163: 17 + row 164: 10 + row 165: 15 + row 166: 15 + row 167: 18 + column: 1 : 22 entries [22:43] + row 0: 21 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 21 + row 7: 21 + ... + Pending (nil) + + 0.0081 sec ] + [ GrB_select (hyper to sparse) + 0.00728 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.75218e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34876 +bucket 2: 24366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59243 + vlen: 2003 nvec_nonempty: 1175 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 727188 shallow: 0 total: 727188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492600 shallow: 0 size: 236972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59242 entries, memory: 710.1 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00806 sec ] + [ GrB_select (hyper to sparse) + 0.00744 sec ] + [ GrB_Matrix_nvals + 1.64e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a7f00 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492400 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00808 sec ] + [ GrB_select (hyper to sparse) + 0.00659 sec ] + [ GrB_Matrix_nvals + 1.71e-07 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.87e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.74887e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34828 +bucket 2: 24358 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 59187 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 726516 shallow: 0 total: 726516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c0000 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 473496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1492400 shallow: 0 size: 236748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 59186 entries, memory: 709.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00807 sec ] + [ GrB_select (hyper to sparse) + 0.00731 sec ] + [ GrB_Matrix_nvals + 1.94e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.72954e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34622 +bucket 2: 24236 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58859 + vlen: 2003 nvec_nonempty: 1170 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 722580 shallow: 0 total: 722580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 470872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1491a00 shallow: 0 size: 235436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58858 entries, memory: 705.6 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00811 sec ] + [ GrB_select (hyper to sparse) + 0.00653 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.71055e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34400 +bucket 2: 24134 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58535 + vlen: 2003 nvec_nonempty: 1166 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 718692 shallow: 0 total: 718692 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 468280 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1491000 shallow: 0 size: 234140 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58534 entries, memory: 701.8 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.00728 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.69713e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34242 +bucket 2: 24062 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58305 + vlen: 2003 nvec_nonempty: 1165 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 715932 shallow: 0 total: 715932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 466440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490900 shallow: 0 size: 233220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58304 entries, memory: 699.2 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00802 sec ] + [ GrB_select (hyper to sparse) + 0.00652 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.68771e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34156 +bucket 2: 23986 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 58143 + vlen: 2003 nvec_nonempty: 1163 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 713988 shallow: 0 total: 713988 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 465144 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1490300 shallow: 0 size: 232572 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 58142 entries, memory: 697.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00804 sec ] + [ GrB_select (hyper to sparse) + 0.00725 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.67635e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 34032 +bucket 2: 23914 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57947 + vlen: 2003 nvec_nonempty: 1161 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 711636 shallow: 0 total: 711636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 463576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148fd00 shallow: 0 size: 231788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57946 entries, memory: 695.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00647 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66884e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33960 +bucket 2: 23856 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57817 + vlen: 2003 nvec_nonempty: 1156 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 710076 shallow: 0 total: 710076 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462536 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f900 shallow: 0 size: 231268 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57816 entries, memory: 693.4 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00723 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66642e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33938 +bucket 2: 23836 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57775 + vlen: 2003 nvec_nonempty: 1154 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 709572 shallow: 0 total: 709572 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 462200 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f800 shallow: 0 size: 231100 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57774 entries, memory: 692.9 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (hyper to sparse) + 0.00645 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66446e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23806 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57741 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 709164 shallow: 0 total: 709164 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461928 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f700 shallow: 0 size: 230964 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57740 entries, memory: 692.5 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00796 sec ] + [ GrB_select (hyper to sparse) + 0.00721 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66307e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33934 +bucket 2: 23782 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57717 + vlen: 2003 nvec_nonempty: 1153 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 708876 shallow: 0 total: 708876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57716 entries, memory: 692.3 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00797 sec ] + [ GrB_select (hyper to sparse) + 0.00644 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a8000 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00795 sec ] + [ GrB_select (hyper to sparse) + 0.00719 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000123 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.66192e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33918 +bucket 2: 23778 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 57697 + vlen: 2003 nvec_nonempty: 1152 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 708636 shallow: 0 total: 708636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc18c3f00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 461576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148f600 shallow: 0 size: 230788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 57696 entries, memory: 692.0 KB + + column: 0 : 21 entries [0:20] + row 1: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + row 9: 20 + row 10: 20 + row 11: 18 + row 156: 18 + row 157: 18 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 16 + row 163: 16 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 21 entries [21:41] + row 0: 20 + row 2: 12 + row 3: 20 + row 4: 20 + row 5: 18 + row 6: 20 + row 7: 20 + row 8: 12 + ... + Pending (nil) + + 0.00805 sec ] + [ GrB_select (hyper to sparse) + 0.00652 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.61411e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 33340 +bucket 2: 23520 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56861 + vlen: 2003 nvec_nonempty: 1145 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 698604 shallow: 0 total: 698604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 454888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148db00 shallow: 0 size: 227444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56860 entries, memory: 682.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0079 sec ] + [ GrB_select (hyper to sparse) + 0.00808 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.57629e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32886 +bucket 2: 23304 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 56191 + vlen: 2003 nvec_nonempty: 1126 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 690564 shallow: 0 total: 690564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 449528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148c600 shallow: 0 size: 224764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 56190 entries, memory: 674.4 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00777 sec ] + [ GrB_select (hyper to sparse) + 0.00804 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.55449e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32744 +bucket 2: 23056 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55801 + vlen: 2003 nvec_nonempty: 1115 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 685884 shallow: 0 total: 685884 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 446408 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148ba00 shallow: 0 size: 223204 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55800 entries, memory: 669.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0078 sec ] + [ GrB_select (hyper to sparse) + 0.00623 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.54381e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32734 +bucket 2: 22874 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55609 + vlen: 2003 nvec_nonempty: 1112 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 683580 shallow: 0 total: 683580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 444872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b400 shallow: 0 size: 222436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55608 entries, memory: 667.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00783 sec ] + [ GrB_select (hyper to sparse) + 0.00697 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53737e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32638 +bucket 2: 22854 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55493 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 682188 shallow: 0 total: 682188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55492 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00783 sec ] + [ GrB_select (hyper to sparse) + 0.00624 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd700 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00783 sec ] + [ GrB_select (hyper to sparse) + 0.00699 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000119 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.53715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32642 +bucket 2: 22846 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 55489 + vlen: 2003 nvec_nonempty: 1106 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 682140 shallow: 0 total: 682140 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02f7b00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 443912 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc148b100 shallow: 0 size: 221956 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 55488 entries, memory: 666.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00788 sec ] + [ GrB_select (hyper to sparse) + 0.00622 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.50245e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 32174 +bucket 2: 22684 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54859 + vlen: 2003 nvec_nonempty: 1100 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 674580 shallow: 0 total: 674580 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 438872 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1489d00 shallow: 0 size: 219436 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54858 entries, memory: 658.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00777 sec ] + [ GrB_select (hyper to sparse) + 0.0069 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.45808e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31858 +bucket 2: 22184 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 54043 + vlen: 2003 nvec_nonempty: 1091 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 664788 shallow: 0 total: 664788 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 432344 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1488300 shallow: 0 size: 216172 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 54042 entries, memory: 649.2 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.0077 sec ] + [ GrB_select (hyper to sparse) + 0.00611 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.42407e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31586 +bucket 2: 21822 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53409 + vlen: 2003 nvec_nonempty: 1084 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 657180 shallow: 0 total: 657180 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 427272 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1487000 shallow: 0 size: 213636 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53408 entries, memory: 641.8 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00762 sec ] + [ GrB_select (hyper to sparse) + 0.00682 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd800 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486a00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00763 sec ] + [ GrB_select (hyper to sparse) + 0.00609 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.08e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.41438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31644 +bucket 2: 21582 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 53227 + vlen: 2003 nvec_nonempty: 1072 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 654996 shallow: 0 total: 654996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02fba00 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 425816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1486a00 shallow: 0 size: 212908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 53226 entries, memory: 639.6 KB + + column: 0 : 19 entries [0:18] + row 1: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + row 157: 16 + row 159: 16 + row 160: 16 + row 161: 18 + row 162: 14 + row 163: 14 + row 165: 14 + row 166: 14 + row 167: 18 + column: 1 : 19 entries [19:37] + row 0: 18 + row 3: 18 + row 4: 18 + row 5: 18 + row 6: 18 + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 156: 16 + ... + Pending (nil) + + 0.00757 sec ] + [ GrB_select (hyper to sparse) + 0.00671 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.35413e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 31126 +bucket 2: 20954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 52081 + vlen: 2003 nvec_nonempty: 1060 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 641244 shallow: 0 total: 641244 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 416648 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484600 shallow: 0 size: 208324 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 52080 entries, memory: 626.2 KB + + column: 0 : 15 entries [0:14] + row 1: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + row 167: 14 + column: 1 : 15 entries [15:29] + row 0: 14 + row 3: 14 + row 4: 14 + row 5: 14 + row 6: 14 + row 7: 14 + row 9: 14 + row 10: 14 + row 11: 14 + row 156: 14 + row 157: 14 + row 159: 14 + row 160: 14 + row 161: 14 + ... + Pending (nil) + + 0.00749 sec ] + [ GrB_select (hyper to sparse) + 0.00658 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.289e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30586 +bucket 2: 20226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50813 + vlen: 2003 nvec_nonempty: 1029 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 626028 shallow: 0 total: 626028 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 406504 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 203252 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50812 entries, memory: 611.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00745 sec ] + [ GrB_select (hyper to sparse) + 0.0058 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.25563e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 30158 +bucket 2: 19992 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 50151 + vlen: 2003 nvec_nonempty: 1016 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 618084 shallow: 0 total: 618084 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 401208 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480a00 shallow: 0 size: 200604 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 50150 entries, memory: 603.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00739 sec ] + [ GrB_select (hyper to sparse) + 0.00652 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.24304e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29956 +bucket 2: 19942 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49899 + vlen: 2003 nvec_nonempty: 1010 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 615060 shallow: 0 total: 615060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 399192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1480200 shallow: 0 size: 199596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49898 entries, memory: 600.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00736 sec ] + [ GrB_select (hyper to sparse) + 0.00578 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.23389e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29796 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49715 + vlen: 2003 nvec_nonempty: 1006 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 612852 shallow: 0 total: 612852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49714 entries, memory: 598.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00731 sec ] + [ GrB_select (hyper to sparse) + 0.00657 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fd900 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00732 sec ] + [ GrB_select (hyper to sparse) + 0.00577 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000117 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.233e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29778 +bucket 2: 19918 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 49697 + vlen: 2003 nvec_nonempty: 1005 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdc00 number of memory blocks: 4 + deep: 612636 shallow: 0 total: 612636 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc02ff900 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 397576 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147fc00 shallow: 0 size: 198788 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 49696 entries, memory: 598.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00738 sec ] + [ GrB_select (hyper to sparse) + 0.00577 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.18699e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 29308 +bucket 2: 19452 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 48761 + vlen: 2003 nvec_nonempty: 984 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 601404 shallow: 0 total: 601404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 390088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147de00 shallow: 0 size: 195044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 48760 entries, memory: 587.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00724 sec ] + [ GrB_select (hyper to sparse) + 0.00602 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13766e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28540 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47737 + vlen: 2003 nvec_nonempty: 937 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 589116 shallow: 0 total: 589116 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 381896 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147be00 shallow: 0 size: 190948 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47736 entries, memory: 575.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00724 sec ] + [ GrB_select (hyper to sparse) + 0.00558 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13337e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28450 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47647 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 588036 shallow: 0 total: 588036 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 381176 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147bb00 shallow: 0 size: 190588 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47646 entries, memory: 574.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00723 sec ] + [ GrB_select (hyper to sparse) + 0.00637 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.13052e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28390 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47587 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 587316 shallow: 0 total: 587316 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 380696 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147ba00 shallow: 0 size: 190348 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47586 entries, memory: 573.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00723 sec ] + [ GrB_select (hyper to sparse) + 0.00561 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fda00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147b600 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00721 sec ] + [ GrB_select (hyper to sparse) + 0.00632 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.12454e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 28264 +bucket 2: 19196 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 47461 + vlen: 2003 nvec_nonempty: 936 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdd00 number of memory blocks: 4 + deep: 585804 shallow: 0 total: 585804 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0303800 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 379688 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147b600 shallow: 0 size: 189844 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 47460 entries, memory: 572.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 19 entries [0:18] + row 7: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + row 151: 17 + row 153: 17 + row 154: 16 + row 155: 18 + row 156: 17 + row 157: 17 + row 159: 18 + row 160: 18 + row 161: 18 + column: 7 : 19 entries [19:37] + row 6: 18 + row 9: 18 + row 10: 18 + row 11: 18 + row 12: 18 + row 13: 18 + row 15: 18 + row 16: 18 + row 17: 18 + row 150: 18 + ... + Pending (nil) + + 0.00727 sec ] + [ GrB_select (hyper to sparse) + 0.0055 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.08267e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 27588 +bucket 2: 18980 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 46569 + vlen: 2003 nvec_nonempty: 914 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 575100 shallow: 0 total: 575100 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 372552 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479a00 shallow: 0 size: 186276 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 46568 entries, memory: 561.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 18 entries [0:17] + row 7: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + row 153: 16 + row 155: 17 + row 156: 17 + row 157: 16 + row 159: 17 + row 160: 17 + row 161: 17 + column: 7 : 18 entries [18:35] + row 6: 17 + row 9: 17 + row 10: 17 + row 11: 17 + row 12: 17 + row 13: 17 + row 15: 17 + row 16: 17 + row 17: 17 + row 150: 17 + row 151: 17 + ... + Pending (nil) + + 0.00712 sec ] + [ GrB_select (hyper to sparse) + 0.00541 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.02496e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26894 +bucket 2: 18416 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 45311 + vlen: 2003 nvec_nonempty: 858 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 560004 shallow: 0 total: 560004 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 362488 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1477200 shallow: 0 size: 181244 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 45310 entries, memory: 546.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 16 entries [0:15] + row 7: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + row 159: 15 + row 160: 15 + row 161: 15 + column: 7 : 16 entries [16:31] + row 6: 15 + row 9: 15 + row 10: 15 + row 11: 15 + row 12: 15 + row 13: 15 + row 15: 15 + row 16: 15 + row 17: 15 + row 150: 15 + row 151: 15 + row 155: 15 + row 156: 15 + ... + Pending (nil) + + 0.00702 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00229e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26438 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44807 + vlen: 2003 nvec_nonempty: 834 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 553956 shallow: 0 total: 553956 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358456 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476300 shallow: 0 size: 179228 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44806 entries, memory: 541.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00608 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdb00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00687 sec ] + [ GrB_select (hyper to sparse) + 0.00611 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.14e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:1.00041e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26396 +bucket 2: 18368 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44765 + vlen: 2003 nvec_nonempty: 827 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 553452 shallow: 0 total: 553452 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0307700 shallow: 0 size: 16032 + ->i: 0x7effc141ea00 shallow: 0 size: 358120 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 179060 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44764 entries, memory: 540.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0069 sec ] + [ GrB_select (hyper to sparse) + 0.00541 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdc00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc141ea00 shallow: 0 size: 16032 + ->i: 0x7effc1422900 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479700 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00692 sec ] + [ GrB_select (hyper to sparse) + 0.00606 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000133 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc141ea00 shallow: 0 size: 16032 + ->i: 0x7effc1422900 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1479700 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00703 sec ] + [ GrB_select (hyper to sparse) + 0.00529 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000104 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987665 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26354 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44479 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 550020 shallow: 0 total: 550020 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1422900 shallow: 0 size: 16032 + ->i: 0x7effc1426800 shallow: 0 size: 355832 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147d600 shallow: 0 size: 177916 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44478 entries, memory: 537.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00691 sec ] + [ GrB_select (hyper to sparse) + 0.00604 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:987221 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26344 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44469 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 549900 shallow: 0 total: 549900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481500 shallow: 0 size: 177876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44468 entries, memory: 537.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00686 sec ] + [ GrB_select (hyper to sparse) + 0.00606 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fde00 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481400 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00684 sec ] + [ GrB_select (hyper to sparse) + 0.0053 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000115 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:985534 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26306 +bucket 2: 18124 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44431 + vlen: 2003 nvec_nonempty: 816 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 549444 shallow: 0 total: 549444 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1426800 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 355448 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481400 shallow: 0 size: 177724 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44430 entries, memory: 536.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00689 sec ] + [ GrB_select (hyper to sparse) + 0.00605 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:975092 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 26240 +bucket 2: 17954 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 44195 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 546612 shallow: 0 total: 546612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 353560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1484c00 shallow: 0 size: 176780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 44194 entries, memory: 533.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00681 sec ] + [ GrB_select (hyper to sparse) + 0.00523 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:954375 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25948 +bucket 2: 17774 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43723 + vlen: 2003 nvec_nonempty: 815 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 540948 shallow: 0 total: 540948 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 349784 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1483d00 shallow: 0 size: 174892 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43722 entries, memory: 528.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00677 sec ] + [ GrB_select (hyper to sparse) + 0.00594 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:929136 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25466 +bucket 2: 17674 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43141 + vlen: 2003 nvec_nonempty: 809 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 533964 shallow: 0 total: 533964 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 345128 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1482b00 shallow: 0 size: 172564 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43140 entries, memory: 521.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00668 sec ] + [ GrB_select (hyper to sparse) + 0.00515 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:923974 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25394 +bucket 2: 17626 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 43021 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 532524 shallow: 0 total: 532524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 344168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1482700 shallow: 0 size: 172084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 43020 entries, memory: 520.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0067 sec ] + [ GrB_select (hyper to sparse) + 0.00526 sec ] + [ GrB_Matrix_nvals + 1.79e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:912071 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25212 +bucket 2: 17530 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42743 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 529188 shallow: 0 total: 529188 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341944 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 170972 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42742 entries, memory: 516.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00663 sec ] + [ GrB_select (hyper to sparse) + 0.00554 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911303 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17464 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42725 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 528972 shallow: 0 total: 528972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481e00 shallow: 0 size: 170900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42724 entries, memory: 516.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00667 sec ] + [ GrB_select (hyper to sparse) + 0.00517 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fdf00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00666 sec ] + [ GrB_select (hyper to sparse) + 0.00591 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.89e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:911047 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25260 +bucket 2: 17458 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 42719 + vlen: 2003 nvec_nonempty: 808 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556a00 number of memory blocks: 4 + deep: 528900 shallow: 0 total: 528900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 341752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 170876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 42718 entries, memory: 516.5 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00671 sec ] + [ GrB_select (hyper to sparse) + 0.00502 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:825948 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25120 +bucket 2: 15554 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 40675 + vlen: 2003 nvec_nonempty: 789 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 504372 shallow: 0 total: 504372 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142e600 shallow: 0 size: 16032 + ->i: 0x7effc1432500 shallow: 0 size: 325400 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1481d00 shallow: 0 size: 162700 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 40674 entries, memory: 492.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00654 sec ] + [ GrB_select (hyper to sparse) + 0.00541 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:672510 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23224 +bucket 2: 13478 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 36703 + vlen: 2003 nvec_nonempty: 736 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 456708 shallow: 0 total: 456708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc142a700 shallow: 0 size: 16032 + ->i: 0x7effc142e600 shallow: 0 size: 293624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1476100 shallow: 0 size: 146812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 36702 entries, memory: 446.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00626 sec ] + [ GrB_select (hyper to sparse) + 0.00453 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:629110 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23236 +bucket 2: 12262 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35499 + vlen: 2003 nvec_nonempty: 698 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 442260 shallow: 0 total: 442260 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1508400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 283992 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146fd00 shallow: 0 size: 141996 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35498 entries, memory: 431.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00615 sec ] + [ GrB_select (hyper to sparse) + 0.00487 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:619084 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23468 +bucket 2: 11746 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35215 + vlen: 2003 nvec_nonempty: 696 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 438852 shallow: 0 total: 438852 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fe400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 281720 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146f400 shallow: 0 size: 140860 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35214 entries, memory: 428.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0062 sec ] + [ GrB_select (hyper to sparse) + 0.00444 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:612282 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23654 +bucket 2: 11366 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 35021 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 436524 shallow: 0 total: 436524 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fcb00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 280168 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc146ee00 shallow: 0 size: 140084 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 35020 entries, memory: 426.3 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00613 sec ] + [ GrB_select (hyper to sparse) + 0.00522 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:611024 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11282 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34985 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 436092 shallow: 0 total: 436092 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fbb00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279880 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ffa00 shallow: 0 size: 139940 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34984 entries, memory: 425.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.0061 sec ] + [ GrB_select (hyper to sparse) + 0.00479 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe000 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb700 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ff600 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00614 sec ] + [ GrB_select (hyper to sparse) + 0.00513 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.58e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:610395 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 23702 +bucket 2: 11264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 34967 + vlen: 2003 nvec_nonempty: 693 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 435876 shallow: 0 total: 435876 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb500 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 279736 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14ff400 shallow: 0 size: 139868 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 34966 entries, memory: 425.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00612 sec ] + [ GrB_select (hyper to sparse) + 0.00473 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:418019 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 24542 +bucket 2: 4394 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 28937 + vlen: 2003 nvec_nonempty: 630 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 363516 shallow: 0 total: 363516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ff400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 231496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 115748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 28936 entries, memory: 355.0 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00586 sec ] + [ GrB_select (hyper to sparse) + 0.00377 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:328878 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25594 +bucket 2: 72 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25667 + vlen: 2003 nvec_nonempty: 579 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 324276 shallow: 0 total: 324276 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ff400 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 205336 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1503300 shallow: 0 size: 102668 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25666 entries, memory: 316.7 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00458 sec ] + [ GrB_select (hyper to sparse) + 0.00363 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc15fe100 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ab500 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00414 sec ] + [ GrB_select (hyper to sparse) + 0.0036 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000106 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14a9100 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00413 sec ] + [ GrB_select (hyper to sparse) + 0.00435 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.57e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:314884 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 25114 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 25115 + vlen: 2003 nvec_nonempty: 528 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556d00 number of memory blocks: 4 + deep: 317652 shallow: 0 total: 317652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ad000 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 200920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 100460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 25114 entries, memory: 310.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00417 sec ] + [ GrB_select (hyper to sparse) + 0.00311 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:151605 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 17426 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 17427 + vlen: 2003 nvec_nonempty: 480 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 225396 shallow: 0 total: 225396 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14fb500 shallow: 0 size: 16032 + ->i: 0x7effc14ff400 shallow: 0 size: 139416 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 69708 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 17426 entries, memory: 220.1 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00347 sec ] + [ GrB_select (hyper to sparse) + 0.00265 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106625 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14614 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14615 + vlen: 2003 nvec_nonempty: 336 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc148c700 number of memory blocks: 4 + deep: 191652 shallow: 0 total: 191652 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc14ad000 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116920 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58460 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14614 entries, memory: 187.2 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00323 sec ] + [ GrB_select (hyper to sparse) + 0.00262 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556b00 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1480100 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00347 sec ] + [ GrB_select (hyper to sparse) + 0.00255 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.25e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:106275 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 14590 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 14591 + vlen: 2003 nvec_nonempty: 334 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556e00 number of memory blocks: 4 + deep: 191364 shallow: 0 total: 191364 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 116728 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc142a700 shallow: 0 size: 58364 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 14590 entries, memory: 186.9 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00316 sec ] + [ GrB_select (hyper to sparse) + 0.00255 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:88339.1 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 13302 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 13303 + vlen: 2003 nvec_nonempty: 327 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 175908 shallow: 0 total: 175908 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc1483e00 shallow: 0 size: 16032 + ->i: 0x7effc14fb500 shallow: 0 size: 106424 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc1515500 shallow: 0 size: 53212 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 13302 entries, memory: 171.8 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00303 sec ] + [ GrB_select (hyper to sparse) + 0.00288 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:77211.2 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12436 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12437 + vlen: 2003 nvec_nonempty: 314 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1454a00 number of memory blocks: 4 + deep: 165516 shallow: 0 total: 165516 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 99496 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49748 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12436 entries, memory: 161.6 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00302 sec ] + [ GrB_select (hyper to sparse) + 0.00315 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556c00 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00291 sec ] + [ GrB_select (hyper to sparse) + 0.00248 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.52e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:75949.9 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 12334 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 12335 + vlen: 2003 nvec_nonempty: 308 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc1556f00 number of memory blocks: 4 + deep: 164292 shallow: 0 total: 164292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc147ff00 shallow: 0 size: 16032 + ->i: 0x7effc142a700 shallow: 0 size: 98680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc14fb500 shallow: 0 size: 49340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 12334 entries, memory: 160.4 KB + + column: 0 : 0 entries [0:-1] + column: 1 : 0 entries [0:-1] + column: 2 : 0 entries [0:-1] + column: 3 : 0 entries [0:-1] + column: 4 : 0 entries [0:-1] + column: 5 : 0 entries [0:-1] + column: 6 : 0 entries [0:-1] + column: 7 : 0 entries [0:-1] + column: 8 : 0 entries [0:-1] + column: 9 : 0 entries [0:-1] + column: 10 : 0 entries [0:-1] + ... + Pending (nil) + + 0.00299 sec ] + [ GrB_select + 0.00142 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:66277 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 3668 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 3669 + vlen: 2003 nvec_nonempty: 203 nvec: 203 plen: 203 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc014fa00 number of memory blocks: 5 + deep: 47524 shallow: 0 total: 47524 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc030b600 shallow: 0 size: 1624 + ->p: 0x7effc06b0700 shallow: 0 size: 1632 + ->i: 0x7effc142a700 shallow: 0 size: 29352 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc147ff00 shallow: 0 size: 14676 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 3668 entries, memory: 46.4 KB + + column: 933 : 13 entries [0:12] + row 934: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 934 : 13 entries [13:25] + row 933: 12 + row 935: 12 + row 936: 12 + row 937: 12 + row 938: 12 + row 939: 12 + row 1031: 12 + row 1032: 12 + row 1033: 12 + row 1034: 12 + row 1038: 12 + row 1039: 12 + row 1040: 12 + column: 935 : 13 entries [26:38] + row 933: 12 + row 934: 12 + row 936: 12 + ... + Pending (nil) + + 0.00166 sec ] + [ GrB_select + 0.00102 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:1512 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 252 +bucket 2: 0 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, hypersparse by col, ints: 64/64 + max # entries: 253 + vlen: 2003 nvec_nonempty: 42 nvec: 42 plen: 42 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc06b0100 number of memory blocks: 5 + deep: 3956 shallow: 0 total: 3956 + GraphBLAS Type: uint32_t size: 4 + ->h: 0x7effc1557200 shallow: 0 size: 336 + ->p: 0x7effc1557000 shallow: 0 size: 344 + ->i: 0x7effc06ff400 shallow: 0 size: 2024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc030b600 shallow: 0 size: 1012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 252 entries, memory: 3.9 KB + + column: 1031 : 6 entries [0:5] + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1032 : 6 entries [6:11] + row 1031: 5 + row 1033: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1033 : 6 entries [12:17] + row 1031: 5 + row 1032: 5 + row 1034: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1034 : 6 entries [18:23] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1038: 5 + row 1039: 5 + row 1040: 5 + column: 1038 : 6 entries [24:29] + row 1031: 5 + row 1032: 5 + row 1033: 5 + row 1034: 5 + row 1039: 5 + ... + Pending (nil) + + 0.00151 sec ] + [ GrB_select C is empty, iso 0 + + 0.000451 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (H{H} = H'*H) work:0 GPUs:0 nthreads 1 ntasks 0 + 0.000172 sec ] + [ GrB_select C is empty, iso 0 + + 0.000387 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 4.43e-06 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0019 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000123 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00161 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000106 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00165 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000105 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00158 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000105 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00167 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000131 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00161 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:76598 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000108 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00155 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:73170 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.52e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00154 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:72288 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.6e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00152 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:68950 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.9e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00149 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:66306 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.00011 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00184 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:59186 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.06e-05 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00179 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:57696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000107 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00171 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:55488 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.64e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00169 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:53226 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.53e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00159 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:49696 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.87e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00151 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:47460 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.69e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00148 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:44764 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.24e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00147 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.53e-05 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0015 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:44478 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.35e-05 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.0014 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:44430 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.55e-05 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00143 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:42718 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.52e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 5.96e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00128 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:34966 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.73e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000996 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 6.54e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 5.22e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000979 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_reduce work:25114 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.4e-05 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_Matrix_nvals + 7.45e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.00075 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:14590 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 7.83e-05 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=S.*S) + 0.000724 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_reduce work:12334 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.33e-05 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_Matrix_nvals + 6.71e-08 sec ] + [ GrB_eWiseMult emult:(S<.>=H.*H) + 0.000247 sec ] + [ GrB_Matrix_nvals + 8.2e-08 sec ] + [ GrB_reduce work:0 gpus:0 + 4.22e-06 sec ] +[ OK ] +SUCCESS: All unit tests have passed. diff --git a/save_errors/o5 b/save_errors/o5 new file mode 100644 index 0000000000..0efe29634e --- /dev/null +++ b/save_errors/o5 @@ -0,0 +1,1781 @@ +Test allktruss... GB_cuda_get_device_count: 4, cudaError_t: 0 + +Device: 0: memory: 17071800320 SMs: 56 compute: 6.0 +GB_cuda_init: 0 + +================================== bcsstk13.mtx: + [ GrB_Matrix_build_FP64 (cast J 1 0) (step1: 0.00223121 sec) (step2: 0.020612 sec) (build, 1 threads) (step3: 0.000836842 sec) (step4: 0.000639699 sec) (jit: cpu load) (step5: 0.00099957 sec) (build 32/32 time: 0.0253694) (hyper to sparse) (wrapup 64/64 time: 0.00322783) (convert ints 32/32 to 64/64, time: 0.000659265) + 0.0293 sec ] + [ GxB_Vector_diag (jit: cuda load) (sparse to hyper) (sparse to full) + 0.0059 sec ] + [ GrB_Vector_nvals + 1.38e-06 sec ] +graph has 2003 self edges + [ GrB_select (jit: cuda load) + 0.00713 sec ] +now has 0 self edges + [ GrB_Matrix_nvals + 1.86e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34715e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) (jit: cuda load) +zombies: 0 +bucket 1: 53444 +bucket 2: 28436 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81881 + vlen: 2003 nvec_nonempty: 2003 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 998844 shallow: 0 total: 998844 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc01fba00 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 655048 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc01ff900 shallow: 0 size: 327524 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81880 entries, memory: 975.4 KB + pending tuples: 0 max pending: 0 zombies: 72 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0103 sec ] + [ GrB_select (wait:A 72 zombies, 0 pending) (jit: cuda load) (hyper to sparse) (jit: cuda load) (hyper to sparse) + 0.0187 sec ] + [ GrB_Matrix_nvals + 1.86e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024f800 shallow: 0 size: 16032 + ->i: 0x7effc0148000 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00912 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:81808 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks)(jit: cuda load) + 0.000323 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.34126e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53374 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81809 + vlen: 2003 nvec_nonempty: 1999 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc024b900 number of memory blocks: 4 + deep: 997980 shallow: 0 total: 997980 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654472 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc014bf00 shallow: 0 size: 327236 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81808 entries, memory: 974.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0105 sec ] + [ GrB_select (hyper to sparse) + 0.00868 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc024b900 shallow: 0 size: 16032 + ->i: 0x7effc0008200 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0148000 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00966 sec ] + [ GrB_select (hyper to sparse) + 0.00852 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:81760 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 8.22e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.33734e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 53326 +bucket 2: 28434 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81761 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 997404 shallow: 0 total: 997404 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0004300 shallow: 0 size: 654088 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 327044 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81760 entries, memory: 974.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.00954 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.296e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52838 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81253 + vlen: 2003 nvec_nonempty: 1989 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 991308 shallow: 0 total: 991308 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0393000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 650024 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc02f3c00 shallow: 0 size: 325012 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81252 entries, memory: 968.1 KB + pending tuples: 0 max pending: 0 zombies: 88 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00971 sec ] + [ GrB_select (wait:A 88 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0182 sec ] + [ GrB_Matrix_nvals + 1.27e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28789e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52738 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81153 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990108 shallow: 0 total: 990108 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649224 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324612 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81152 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00961 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc01e7e00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0148000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0057600 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00976 sec ] + [ GrB_select (hyper to sparse) + 0.00849 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:81148 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.87e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.28757e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52734 +bucket 2: 28414 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81149 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 990060 shallow: 0 total: 990060 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 649192 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324596 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81148 entries, memory: 966.9 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (hyper to sparse) + 0.0093 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27866e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52638 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81039 + vlen: 2003 nvec_nonempty: 1946 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988740 shallow: 0 total: 988740 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648312 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc0053700 shallow: 0 size: 324156 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81038 entries, memory: 965.6 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.017 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52600 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 81001 + vlen: 2003 nvec_nonempty: 1945 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 988284 shallow: 0 total: 988284 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 648008 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 324004 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 81000 entries, memory: 965.1 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.0097 sec ] + [ GrB_select (hyper to sparse) + 0.00928 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.273e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52568 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80969 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987900 shallow: 0 total: 987900 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647752 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323876 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80968 entries, memory: 964.7 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00964 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27171e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52552 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80953 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987708 shallow: 0 total: 987708 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647624 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323812 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80952 entries, memory: 964.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00984 sec ] + [ GrB_select (hyper to sparse) + 0.00922 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27106e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52544 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80945 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987612 shallow: 0 total: 987612 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647560 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323780 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80944 entries, memory: 964.5 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00963 sec ] + [ GrB_select (hyper to sparse) + 0.00851 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253a00 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc00a2800 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00962 sec ] + [ GrB_select (hyper to sparse) + 0.00925 sec ] + [ GrB_Matrix_nvals + 8.94e-08 sec ] + [ GrB_reduce work:80940 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 9.52e-05 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.27074e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 52540 +bucket 2: 28400 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80941 + vlen: 2003 nvec_nonempty: 1944 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6700 number of memory blocks: 4 + deep: 987564 shallow: 0 total: 987564 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 647528 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 323764 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80940 entries, memory: 964.4 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00968 sec ] + [ GrB_select (hyper to sparse) + 0.0085 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.20416e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51834 +bucket 2: 28278 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 80113 + vlen: 2003 nvec_nonempty: 1935 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 977628 shallow: 0 total: 977628 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 640904 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 320452 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 80112 entries, memory: 954.7 KB + pending tuples: 0 max pending: 0 zombies: 36 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00957 sec ] + [ GrB_select (wait:A 36 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 9.69e-08 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.17559e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51490 +bucket 2: 28264 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79755 + vlen: 2003 nvec_nonempty: 1926 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 973332 shallow: 0 total: 973332 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 638040 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc08c2f00 shallow: 0 size: 319020 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79754 entries, memory: 950.5 KB + pending tuples: 0 max pending: 0 zombies: 2 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00951 sec ] + [ GrB_select (wait:A 2 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0175 sec ] + [ GrB_Matrix_nvals + 1.19e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.15333e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51226 +bucket 2: 28248 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79475 + vlen: 2003 nvec_nonempty: 1911 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 969972 shallow: 0 total: 969972 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc038f000 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 635800 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317900 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79474 entries, memory: 947.2 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 27 entries [29:55] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.14223e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51092 +bucket 2: 28242 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79335 + vlen: 2003 nvec_nonempty: 1901 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 968292 shallow: 0 total: 968292 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634680 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317340 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79334 entries, memory: 945.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00915 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13684e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 51032 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79267 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 967476 shallow: 0 total: 967476 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014bf00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 634136 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 317068 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79266 entries, memory: 944.8 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00838 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13368e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50992 +bucket 2: 28234 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79227 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966996 shallow: 0 total: 966996 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc014fe00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633816 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316908 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79226 entries, memory: 944.3 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.00915 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.13162e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50970 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79201 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966684 shallow: 0 total: 966684 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633608 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316804 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79200 entries, memory: 944.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00839 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12846e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50930 +bucket 2: 28230 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79161 + vlen: 2003 nvec_nonempty: 1896 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 966204 shallow: 0 total: 966204 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 633288 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316644 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79160 entries, memory: 943.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00952 sec ] + [ GrB_select (hyper to sparse) + 0.00916 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12451e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50884 +bucket 2: 28226 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79111 + vlen: 2003 nvec_nonempty: 1893 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965604 shallow: 0 total: 965604 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632888 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316444 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79110 entries, memory: 943.0 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00947 sec ] + [ GrB_select (hyper to sparse) + 0.00837 sec ] + [ GrB_Matrix_nvals + 1.04e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253b00 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00911 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_reduce work:79078 gpus:4 has_cheeseburger 1 +(cuda reduce launch 320 threads in 1 blocks) + 0.000118 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.12198e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 50856 +bucket 2: 28222 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 79079 + vlen: 2003 nvec_nonempty: 1891 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc00a6800 number of memory blocks: 4 + deep: 965220 shallow: 0 total: 965220 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0153a00 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 632632 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 316316 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 79078 entries, memory: 942.6 KB + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00948 sec ] + [ GrB_select (hyper to sparse) + 0.00835 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:3.04165e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49922 +bucket 2: 28132 +bucket 3: 0 +bucket 4: 0 + + 2003x2003 GraphBLAS uint32_t matrix, sparse by col, ints: 64/64 + max # entries: 78055 + vlen: 2003 nvec_nonempty: 1878 nvec: 2003 plen: 2003 vdim: 2003 + hyper_switch 0.0625 bitmap_switch 0.4 + sparsity control: hyper/sparse/bitmap/full + header 0x7effc0253c00 number of memory blocks: 4 + deep: 952932 shallow: 0 total: 952932 + GraphBLAS Type: uint32_t size: 4 + ->h: (nil) shallow: 0 size: 0 + ->p: 0x7effc0157900 shallow: 0 size: 16032 + ->i: 0x7effc0698c00 shallow: 0 size: 624440 + ->b: (nil) shallow: 0 size: 0 + ->x: 0x7effc07d5c00 shallow: 0 size: 312220 + ->Y: (nil) shallow: 0 no_hyper_hash: 0 + C result from dot3 cuda A'*B, 78054 entries, memory: 930.6 KB + pending tuples: 0 max pending: 0 zombies: 60 + + column: 0 : 29 entries [0:28] + row 1: 22 + row 2: 14 + row 3: 20 + row 4: 26 + row 5: 24 + row 6: 22 + row 7: 22 + row 8: 14 + row 9: 20 + row 10: 20 + row 11: 18 + row 36: 10 + row 40: 10 + row 41: 10 + row 156: 19 + row 157: 19 + row 158: 12 + row 159: 18 + row 160: 18 + row 161: 18 + row 162: 23 + row 163: 18 + row 164: 13 + row 165: 16 + row 166: 22 + row 167: 24 + row 168: 9 + row 172: 9 + row 173: 10 + column: 1 : 23 entries [29:51] + ... + Pending (nil) + + 0.00943 sec ] + [ GrB_select (wait:A 60 zombies, 0 pending) (hyper to sparse) (hyper to sparse) + 0.0173 sec ] + [ GrB_Matrix_nvals + 1.12e-07 sec ] + [ GrB_mxm C=A'*B, masked_dot_product (dot3) (S{S} = S'*S) work:2.97438e+06 GPUs:4 (GPU dot3) (GPU C created and copied from M) +zombies: 0 +bucket 1: 49104 +bucket 2: 794687 +bucket 3: 0 +bucket 4: 0