Skip to content

Commit

Permalink
Merge pull request #367 from DrTimothyAldenDavis/dev2
Browse files Browse the repository at this point in the history
9.4.2
  • Loading branch information
DrTimothyAldenDavis authored Nov 20, 2024
2 parents e3f309c + 74a5370 commit bde76fb
Show file tree
Hide file tree
Showing 3,244 changed files with 76,990 additions and 81,224 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ endif ( )

# CUDA is under development for now, and not deployed in production:
set ( GRAPHBLAS_USE_CUDA OFF )
# set ( GRAPHBLAS_USE_CUDA ON ) # FIXME: use this for CUDA development
# set ( GRAPHBLAS_USE_CUDA ON ) # use this for CUDA development only

include ( SuiteSparsePolicy )

Expand Down Expand Up @@ -210,17 +210,25 @@ configure_file ( "Config/README.md.in"
include_directories ( ${PROJECT_SOURCE_DIR} Source Include Config
xxHash lz4 zstd zstd/zstd_subset JITpackage Demo/Include rmm_wrap
# include all Source/* folders that have include/ or template/ subfolders:
Source/add
Source/apply
Source/assign
Source/builder
Source/builtin
Source/callback
Source/concat
Source/convert
Source/cumsum
Source/emult
Source/ewise
Source/extract
Source/hyper
Source/ij
Source/jit_kernels
Source/kronecker
Source/mask
Source/math
Source/matrix
Source/memory
Source/monoid
Source/mxm
Expand All @@ -231,6 +239,7 @@ include_directories ( ${PROJECT_SOURCE_DIR} Source Include Config
Source/select
Source/split
Source/slice
Source/sort
Source/transpose
Source/type
Source/wait
Expand Down
18 changes: 18 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Contributing to SuiteSparse:GraphBLAS

To add an issue for a bug report (gasp!) or a feature request,
you can use the issue tracker on github.com, at
[`https://github.com/DrTimothyAldenDavis/GraphBLAS/issues`]
(https://github.com/DrTimothyAldenDavis/GraphBLAS/issues).

To contribute code, you can submit a pull request. To do so,
you must first agree to the Contributor License
in the [`Contributor_License`](Contributor_License) folder.
Sign and date it the PDF, and email it to me at
[email protected]. Pull requests will only be
included into SuiteSparse:GraphBLAS after I receive your email with
the signed PDF.

Do not submit a pull request to the default branch.
Instead, use the dev2 branch.

177 changes: 0 additions & 177 deletions CONTRIBUTOR-LICENSE.txt

This file was deleted.

8 changes: 4 additions & 4 deletions CUDA/GB_cuda_AxB_dot3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ GrB_Info GB_cuda_AxB_dot3 // C<M> = A'*B using dot product method

// FIXME: pass in a stream instead, or checkout a stream
CUDA_OK (cudaStreamCreate (&stream)) ;
GpuTimer kernel_timer;
GpuTimer kernel_timer;

//--------------------------------------------------------------------------
// check inputs
Expand Down Expand Up @@ -165,7 +165,7 @@ GrB_Info GB_cuda_AxB_dot3 // C<M> = A'*B using dot product method
GB_OK (GB_new_bix (&C, // sparse or hyper (from M), existing header
ctype, cvlen, cvdim, GB_Ap_malloc, true,
M_sparsity, false, M->hyper_switch, cnvec,
cnz+1, // add one to cnz for GB_cumsum of Cwork
cnz+1, // add one to cnz for cumsum of Cwork
true, C_iso)) ;

//--------------------------------------------------------------------------
Expand Down Expand Up @@ -219,7 +219,7 @@ GrB_Info GB_cuda_AxB_dot3 // C<M> = A'*B using dot product method

// M might be very very sparse. A(:,i) is not needed if M(:,i) is empty.
// Likewise, B(:,j) is not needed if M(:,j) is empty. For now, try this
// heuristic: if M is hypersparse, then do not prefetch A->b or A->x.
// heuristic: if M is hypersparse, then do not prefetch A->b or A->x.

int prefetch_b = (M_is_hyper) ? 0 : GB_PREFETCH_B ;
int prefetch_x = (M_is_hyper) ? 0 : GB_PREFETCH_X ;
Expand Down Expand Up @@ -252,6 +252,6 @@ GrB_Info GB_cuda_AxB_dot3 // C<M> = A'*B using dot product method
//--------------------------------------------------------------------------

GB_FREE_WORKSPACE ;
return GrB_SUCCESS;
return GrB_SUCCESS;
}

2 changes: 1 addition & 1 deletion CUDA/GB_cuda_AxB_dot3_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,6 @@ GrB_Info GB_cuda_AxB_dot3_jit

GB_jit_dl_function GB_jit_kernel = (GB_jit_dl_function) dl_function ;
return (GB_jit_kernel (C, M, A, B, stream, device, number_of_sms,
&GB_callback)) ;
&GB_callback, semiring->multiply->theta)) ;
}

5 changes: 3 additions & 2 deletions CUDA/GB_cuda_apply_bind1st_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ GrB_Info GB_cuda_apply_bind1st_jit
GB_jit_encoding encoding ;
char *suffix ;
uint64_t hash = GB_encodify_ewise (&encoding, &suffix,
GB_JIT_CUDA_KERNEL_APPLYBIND1, false, false, false, GxB_FULL, ctype,
NULL, false, false, op, false, NULL, A) ;
GB_JIT_CUDA_KERNEL_APPLYBIND1, false,
false, false, GxB_FULL, ctype, NULL, false, false,
op, false, false, NULL, A) ;

//--------------------------------------------------------------------------
// get the kernel function pointer, loading or compiling it if needed
Expand Down
5 changes: 3 additions & 2 deletions CUDA/GB_cuda_apply_bind2nd_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ GrB_Info GB_cuda_apply_bind2nd_jit
GB_jit_encoding encoding ;
char *suffix ;
uint64_t hash = GB_encodify_ewise (&encoding, &suffix,
GB_JIT_CUDA_KERNEL_APPLYBIND2, false, false, false, GxB_FULL, ctype,
NULL, false, false, op, false, A, NULL) ;
GB_JIT_CUDA_KERNEL_APPLYBIND2, false,
false, false, GxB_FULL, ctype, NULL, false, false,
op, false, false, A, NULL) ;

//--------------------------------------------------------------------------
// get the kernel function pointer, loading or compiling it if needed
Expand Down
5 changes: 3 additions & 2 deletions CUDA/GB_cuda_apply_unop_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ GrB_Info GB_cuda_apply_unop_jit
GB_jit_encoding encoding ;
char *suffix ;
uint64_t hash = GB_encodify_apply (&encoding, &suffix,
GB_JIT_CUDA_KERNEL_APPLYUNOP, GxB_FULL, false, ctype, op, flipij, A) ;
GB_JIT_CUDA_KERNEL_APPLYUNOP, GxB_FULL, false, ctype, op, flipij,
GB_sparsity (A), true, A->type, A->iso, A->nzombies) ;

//--------------------------------------------------------------------------
// get the kernel function pointer, loading or compiling it if needed
Expand All @@ -48,4 +49,4 @@ GrB_Info GB_cuda_apply_unop_jit

GB_jit_dl_function GB_jit_kernel = (GB_jit_dl_function) dl_function ;
return (GB_jit_kernel (Cx, A, ythunk, stream, gridsz, blocksz)) ;
}
}
2 changes: 1 addition & 1 deletion CUDA/GB_cuda_colscale_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ GrB_Info GB_cuda_colscale_jit
uint64_t hash = GB_encodify_ewise (&encoding, &suffix,
GB_JIT_CUDA_KERNEL_COLSCALE, false,
false, false, GB_sparsity (C), C->type, NULL, false, false,
binaryop, flipxy, A, D) ;
binaryop, false, flipxy, A, D) ;

//--------------------------------------------------------------------------
// get the kernel function pointer, loading or compiling it if needed
Expand Down
3 changes: 3 additions & 0 deletions CUDA/GB_cuda_get_device_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ bool GB_cuda_get_device_count // true if OK, false if failure
int *gpu_count // return # of GPUs in the system
)
{
(*gpu_count) = 0 ;
cudaError_t err = cudaGetDeviceCount (gpu_count) ;
printf ("GB_cuda_get_device_count: %d, cudaError_t: %d\n",
*gpu_count, err) ;
return (err == cudaSuccess) ;
}

10 changes: 8 additions & 2 deletions CUDA/GB_cuda_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@ GrB_Info GB_cuda_init (void)
{

// get the GPU properties
if (!GB_Global_gpu_count_set (true)) return (GrB_PANIC) ;
if (!GB_Global_gpu_count_set (true))
{
printf ("GB_cuda_init line %d\n", __LINE__) ;
return (GrB_PANIC) ;
}
int gpu_count = GB_Global_gpu_count_get ( ) ;
for (int device = 0 ; device < 1 ; device++) // TODO for GPU: gpu_count
{
// query the GPU and then warm it up
if (!GB_Global_gpu_device_properties_get (device))
{
printf ("GB_cuda_init line %d\n", __LINE__) ;
return (GrB_PANIC) ;
}
}
Expand All @@ -41,14 +46,15 @@ GrB_Info GB_cuda_init (void)
// of the work. Alternatively, move GB_cuda_init here (if so,
// ensure that it doesn't depend on any other initializations
// below).
256 * 1000000L, 256 * 100000000L, 1) ;
256 * 1000000L, 1024 * 100000000L, 1) ; // FIXME: ask the GPU(s)
}

// warm up the GPUs
for (int device = 0 ; device < 1 ; device++) // TODO for GPU: gpu_count
{
if (!GB_cuda_warmup (device))
{
printf ("GB_cuda_init line %d\n", __LINE__) ;
return (GrB_PANIC) ;
}
}
Expand Down
6 changes: 2 additions & 4 deletions CUDA/GB_cuda_reduce_to_scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,11 @@ GrB_Info GB_cuda_reduce_to_scalar
int gridsz = (int) raw_gridsz ;

// FIXME: GB_enumify_reduce is called twice: here (to get has_cheeseburger)
// and in GB_cuda_reduce_to_scalar_jit. Can we just call it once? One
// solution: The code from here to the call to GB_cuda_reduce_to_scalar_jit
// could be added to the GB_cuda_reduce_to_scalar_jit function itself.
// and in GB_cuda_reduce_to_scalar_jit. Can we just call it once?

uint64_t rcode ;
GB_enumify_reduce (&rcode, monoid, A) ;
bool has_cheeseburger = GB_RSHIFT (rcode, 27, 1) ;
bool has_cheeseburger = GB_RSHIFT (rcode, 16, 1) ;
GBURBLE ("has_cheeseburger %d\n", has_cheeseburger) ;

// determine the kind of reduction: partial (to &V), or complete
Expand Down
Loading

0 comments on commit bde76fb

Please sign in to comment.