diff --git a/CUDA/GB_cuda_common_jitFactory.hpp b/CUDA/GB_cuda_common_jitFactory.hpp index 896dc1609f..a4dbf7f421 100644 --- a/CUDA/GB_cuda_common_jitFactory.hpp +++ b/CUDA/GB_cuda_common_jitFactory.hpp @@ -43,6 +43,7 @@ static const std::vector compiler_flags{ "-I.", "-I..", "-I../templates", + "-I../../Source/Shared", // Add includes relative to GRAPHBLAS_SOURCE_PATH variable "-I" + jit::get_user_graphblas_source_path() + "/CUDA", diff --git a/CUDA/GB_hash.h b/CUDA/GB_hash.h deleted file mode 120000 index 390a314548..0000000000 --- a/CUDA/GB_hash.h +++ /dev/null @@ -1 +0,0 @@ -../Source/Shared/GB_hash.h \ No newline at end of file diff --git a/CUDA/GB_hash.h b/CUDA/GB_hash.h new file mode 100644 index 0000000000..5e46e721af --- /dev/null +++ b/CUDA/GB_hash.h @@ -0,0 +1,48 @@ +//------------------------------------------------------------------------------ +// GB_hash.h: definitions for hashing +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//------------------------------------------------------------------------------ + +#ifndef GB_HASH_H +#define GB_HASH_H + +//------------------------------------------------------------------------------ +// functions for the Hash method for C=A*B, and for the A->Y hyperhash +//------------------------------------------------------------------------------ + +// initial hash function, for where to place the integer i in the hash table. +// hash_bits is a bit mask to compute the result modulo the hash table size, +// which is always a power of 2. The function is (i*257) & (hash_bits). +#define GB_HASHF(i,hash_bits) ((((i) << 8) + (i)) & (hash_bits)) + +// #define GB_HASHF2(i,hash_bits) ((i) & (hash_bits)) +// lots of intentional collisions: +// #define GB_HASHF2(i,hash_bits) ((i >> 2) & (hash_bits)) + +// lots of intentional collisions: but blocks are scattered + #define GB_HASHF2(i,hash_bits) ((((i) >> 2) + 17L*((i) >> 8)) & (hash_bits)) + +// rehash function, for subsequent hash lookups if the initial hash function +// refers to a hash entry that is already occupied. Linear probing is used, +// so the function does not currently depend on i. On input, hash is equal +// to the current value of the hash function, and on output, hash is set to +// the new hash value. +#define GB_REHASH(hash,i,hash_bits) hash = ((hash + 1) & (hash_bits)) + +// The hash functions and their parameters are modified from this paper: + +// [2] Yusuke Nagasaka, Satoshi Matsuoka, Ariful Azad, and Aydin Buluc. 2018. +// High-Performance Sparse Matrix-Matrix Products on Intel KNL and Multicore +// Architectures. In Proc. 47th Intl. Conf. on Parallel Processing (ICPP '18). +// Association for Computing Machinery, New York, NY, USA, Article 34, 1–10. +// DOI:https://doi.org/10.1145/3229710.3229720 + +// The hash function in that paper is (i*107)&(hash_bits). Here, the term +// 107 is replaced with 257 to allow for a faster hash function computation. + +#endif + diff --git a/CUDA/TODO.txt b/CUDA/TODO.txt index b4bb666ca1..75babdcf12 100644 --- a/CUDA/TODO.txt +++ b/CUDA/TODO.txt @@ -8,3 +8,4 @@ test complex and user-defined ops (wildtype) on the GPU reduce: do any monoid terminal condition? +GxB_VOID diff --git a/Makefile b/Makefile index 1b3177e2e6..7b1272db8b 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,10 @@ cuda: local: ( cd build && cmake $(CMAKE_OPTIONS) -DSUITESPARSE_LOCAL=1 .. && $(MAKE) --jobs=${JOBS} ) +# install in SuiteSparse/lib and SuiteSparse/include and /usr/local +global: + ( cd build && cmake $(CMAKE_OPTIONS) -DSUITESPARSE_LOCAL=0 .. && $(MAKE) --jobs=${JOBS} ) + # compile with -g debug: ( cd build && cmake -DCMAKE_BUILD_TYPE=Debug $(CMAKE_OPTIONS) .. && $(MAKE) --jobs=$(JOBS) )